The Descriptor Protocol
When Python does an attribute lookup obj.attr, it follows these rules (simplified):
- Look in
type(obj).__mro__class dicts for a data descriptor (has both__get__and__set__). If found, call its__get__. - Look in
obj.__dict__. If found, return it. - Look in class dicts for a non-data descriptor (only
__get__). If found, call its__get__. - Raise
AttributeError.
class Descriptor:
def __get__(self, obj, objtype=None):
print(f'__get__ called: obj={obj!r}, objtype={objtype}')
if obj is None:
return self # accessed via class, not instance
return 42
def __set__(self, obj, value):
print(f'__set__ called: obj={obj!r}, value={value!r}')
def __delete__(self, obj):
print(f'__delete__ called: obj={obj!r}')
class MyClass:
attr = Descriptor() # descriptor lives in the class dict
obj = MyClass()
print(obj.attr) # calls __get__
obj.attr = 100 # calls __set__
del obj.attr # calls __delete__
print(MyClass.attr) # calls __get__ with obj=None
__get__ called: obj=, objtype= 42 __set__ called: obj= , value=100 __delete__ called: obj= __get__ called: obj=None, objtype= <__main__.Descriptor object at 0x...>
The descriptor must be a class attribute - storing a descriptor in an instance dict does not activate the protocol:
obj = MyClass()
# Store descriptor in instance dict - NOT activated
obj.__dict__['desc'] = Descriptor()
print(obj.desc) # returns the Descriptor object itself, no __get__ called!
Data vs Non-Data Descriptors
The difference controls whether an instance __dict__ entry can shadow the descriptor:
class NonDataDescriptor:
def __get__(self, obj, objtype=None):
return 'from descriptor'
# no __set__ - non-data descriptor
class DataDescriptor:
def __get__(self, obj, objtype=None):
return 'from descriptor'
def __set__(self, obj, value):
pass # required to make it a data descriptor
class A:
non_data = NonDataDescriptor()
data = DataDescriptor()
a = A()
# Non-data: instance dict wins
a.__dict__['non_data'] = 'from instance'
print(a.non_data) # 'from instance' - instance dict shadows descriptor
# Data: descriptor wins
a.__dict__['data'] = 'from instance'
print(a.data) # 'from descriptor' - data descriptor takes priority
Functions are non-data descriptors - that's how methods work:
def greet(self):
return f'Hello from {self}'
# Functions have __get__
print(hasattr(greet, '__get__')) # True
print(hasattr(greet, '__set__')) # False - non-data descriptor
class Dog:
bark = greet # just a function in the class dict
d = Dog()
# d.bark calls greet.__get__(d, Dog) which returns a bound method
print(d.bark()) # Hello from <__main__.Dog object>
# You can shadow a method on an instance (non-data - instance dict wins)
d.bark = lambda: 'custom bark'
print(d.bark()) # custom bark
How property Works
property is a data descriptor built into Python. We can reimplement it to understand its internals:
class MyProperty:
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
self.fget = fget
self.fset = fset
self.fdel = fdel
self.__doc__ = doc or (fget.__doc__ if fget else None)
def __get__(self, obj, objtype=None):
if obj is None:
return self # class-level access returns the descriptor
if self.fget is None:
raise AttributeError('unreadable attribute')
return self.fget(obj)
def __set__(self, obj, value):
if self.fset is None:
raise AttributeError("can't set attribute")
self.fset(obj, value)
def __delete__(self, obj):
if self.fdel is None:
raise AttributeError("can't delete attribute")
self.fdel(obj)
def getter(self, fget):
return MyProperty(fget, self.fset, self.fdel, self.__doc__)
def setter(self, fset):
return MyProperty(self.fget, fset, self.fdel, self.__doc__)
def deleter(self, fdel):
return MyProperty(self.fget, self.fset, fdel, self.__doc__)
class Circle:
def __init__(self, radius):
self._radius = radius
@MyProperty
def radius(self):
return self._radius
@radius.setter
def radius(self, value):
if value < 0:
raise ValueError('radius cannot be negative')
self._radius = value
c = Circle(5)
print(c.radius) # 5
c.radius = 10
print(c.radius) # 10
Reusable Descriptors
The real power of descriptors is reuse. A Validated descriptor can enforce type and range constraints on any number of attributes:
class Validated:
def __set_name__(self, owner, name):
self.public_name = name
self.private_name = '_' + name # store in instance dict under different key
def __get__(self, obj, objtype=None):
if obj is None:
return self
return getattr(obj, self.private_name, self.default)
def __set__(self, obj, value):
value = self.validate(value)
setattr(obj, self.private_name, value)
def validate(self, value):
return value # subclasses override
class PositiveInt(Validated):
default = 0
def validate(self, value):
if not isinstance(value, int):
raise TypeError(f'{self.public_name} must be an int')
if value <= 0:
raise ValueError(f'{self.public_name} must be positive')
return value
class NonEmptyStr(Validated):
default = ''
def validate(self, value):
if not isinstance(value, str):
raise TypeError(f'{self.public_name} must be a str')
if not value.strip():
raise ValueError(f'{self.public_name} cannot be empty')
return value.strip()
class Product:
name = NonEmptyStr()
price = PositiveInt()
quantity = PositiveInt()
def __init__(self, name, price, quantity):
self.name = name
self.price = price
self.quantity = quantity
p = Product('Widget', 999, 50)
print(p.name, p.price) # Widget 999
__set_name__
Added in Python 3.6, __set_name__(owner, name) is called when a descriptor is assigned as a class attribute. It gives the descriptor access to the attribute name without requiring the name to be passed explicitly:
class LoggedAttr:
def __set_name__(self, owner, name):
self.name = name
print(f'LoggedAttr installed as {owner.__name__}.{name}')
def __get__(self, obj, objtype=None):
if obj is None:
return self
return obj.__dict__.get(self.name)
def __set__(self, obj, value):
print(f'Setting {self.name} = {value!r}')
obj.__dict__[self.name] = value
class Config:
host = LoggedAttr() # __set_name__ called here with name='host'
port = LoggedAttr() # __set_name__ called here with name='port'
c = Config()
c.host = 'localhost'
c.port = 8080
print(c.host, c.port)
LoggedAttr installed as Config.host LoggedAttr installed as Config.port Setting host = 'localhost' Setting port = 8080 localhost 8080
__slots__
__slots__ replaces per-instance __dict__ with a fixed set of slot descriptors. Each slot is actually a descriptor in the class dict:
import sys
class WithDict:
def __init__(self, x, y):
self.x = x
self.y = y
class WithSlots:
__slots__ = ('x', 'y')
def __init__(self, x, y):
self.x = x
self.y = y
d = WithDict(1, 2)
s = WithSlots(1, 2)
print(sys.getsizeof(d)) # ~48 bytes (object header)
print(sys.getsizeof(d.__dict__)) # ~232 bytes (dict)
print(sys.getsizeof(s)) # ~56 bytes (object + 2 slots)
# no s.__dict__ at all
# Slots are descriptors in the class
print(type(WithSlots.x)) #
class Point:
__slots__ = ('x', 'y')
def __init__(self, x, y):
self.x = x
self.y = y
p = Point(1, 2)
# Cannot add arbitrary attributes
try:
p.z = 3
except AttributeError as e:
print(e) # 'Point' object has no attribute 'z'
# Subclass without __slots__ gets __dict__ again
class Point3D(Point):
def __init__(self, x, y, z):
super().__init__(x, y)
self.z = z # stored in __dict__ (subclass has no slots for z)
# Include __weakref__ in slots if weak references are needed
class Node:
__slots__ = ('value', 'next', '__weakref__')
Use __slots__ when you have many instances of a class and memory is a concern - for example, nodes in a large graph, rows in an in-memory dataset, or event objects. The memory saving is typically 40-50% per instance. Do not use it just for performance - measure first. @dataclass(slots=True) (Python 3.10+) generates __slots__ automatically.