@dataclass Basics
Apply @dataclass to a class with type-annotated fields and Python generates the standard dunder methods for you:
from dataclasses import dataclass
@dataclass
class Point:
x: float
y: float
# Generated __init__
p = Point(1.0, 2.0)
print(p) # Point(x=1.0, y=2.0) <- generated __repr__
print(p.x, p.y) # 1.0 2.0
# Generated __eq__
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
print(p1 == p2) # True
# Equivalent manual class (what @dataclass replaces)
class PointManual:
def __init__(self, x: float, y: float):
self.x = x
self.y = y
def __repr__(self):
return f'PointManual(x={self.x!r}, y={self.y!r})'
def __eq__(self, other):
if not isinstance(other, PointManual): return NotImplemented
return (self.x, self.y) == (other.x, other.y)
Fields with defaults must come after fields without defaults - same rule as function parameters:
from dataclasses import dataclass, field
@dataclass
class Config:
host: str = 'localhost'
port: int = 8080
debug: bool = False
# WRONG - would raise ValueError:
# tags: list = [] # mutable default not allowed
tags: list = field(default_factory=list)
c = Config()
print(c) # Config(host='localhost', port=8080, debug=False, tags=[])
c2 = Config(host='example.com', port=443, debug=True)
c2.tags.append('prod')
print(c2) # Config(host='example.com', port=443, debug=True, tags=['prod'])
Control what @dataclass generates with keyword arguments:
from dataclasses import dataclass
@dataclass(order=True, frozen=True)
class Version:
major: int
minor: int
patch: int
v1 = Version(1, 2, 3)
v2 = Version(2, 0, 0)
print(v1 < v2) # True <- generated __lt__ (order=True)
print(hash(v1)) # works <- generated __hash__ (frozen=True)
# @dataclass options:
# init=True generate __init__ (default True)
# repr=True generate __repr__ (default True)
# eq=True generate __eq__ (default True)
# order=False generate comparison methods (default False)
# frozen=False make immutable (default False)
# unsafe_hash=False force __hash__ generation
# slots=False generate __slots__ (default False, Python 3.10+)
# kw_only=False all fields keyword-only (Python 3.10+)
field() Options
field() customises individual field behaviour:
from dataclasses import dataclass, field
import uuid
@dataclass
class Order:
# default_factory: callable called with no args for each instance
id: str = field(default_factory=lambda: str(uuid.uuid4()))
items: list = field(default_factory=list)
metadata: dict = field(default_factory=dict)
# repr=False: exclude from __repr__ output
_secret: str = field(default='', repr=False)
# compare=False: exclude from __eq__ and ordering
created_at: str = field(default='', compare=False)
# init=False: not included in __init__, set in __post_init__
total: float = field(default=0.0, init=False)
# hash=False: exclude from __hash__
cache: dict = field(default_factory=dict, hash=False, compare=False)
o = Order()
print(o.id) # unique UUID
print(o) # Order(id='...', items=[], metadata={}, created_at='', total=0.0)
__post_init__ and InitVar
__post_init__ runs after __init__ completes - use it for derived fields and validation:
from dataclasses import dataclass, field
@dataclass
class Rectangle:
width: float
height: float
area: float = field(init=False) # computed, not passed to __init__
def __post_init__(self):
if self.width <= 0 or self.height <= 0:
raise ValueError('dimensions must be positive')
self.area = self.width * self.height
r = Rectangle(3.0, 4.0)
print(r.area) # 12.0
# Rectangle(0, 1) raises ValueError
InitVar declares a constructor-only parameter that is not stored as a field:
from dataclasses import dataclass, field, InitVar
import hashlib
@dataclass
class User:
name: str
password: InitVar[str] # passed to __init__ but not stored as field
email: str = ''
_hash: str = field(init=False, repr=False)
def __post_init__(self, password: str):
# password is available here but not stored
self._hash = hashlib.sha256(password.encode()).hexdigest()
def check_password(self, password: str) -> bool:
return self._hash == hashlib.sha256(password.encode()).hexdigest()
u = User(name='Alice', password='secret123')
print(u) # User(name='Alice', email='') - no password!
print(u.check_password('secret123')) # True
frozen=True (Immutable)
Frozen dataclasses are immutable value objects - they generate __hash__ and prevent attribute assignment:
from dataclasses import dataclass, replace
@dataclass(frozen=True)
class Colour:
r: int
g: int
b: int
def __post_init__(self):
for field_name, val in [('r', self.r), ('g', self.g), ('b', self.b)]:
if not 0 <= val <= 255:
raise ValueError(f'{field_name} must be 0-255')
def blend(self, other: 'Colour', ratio: float = 0.5) -> 'Colour':
# Use replace() to create a modified copy (cannot mutate)
return Colour(
r=int(self.r * (1-ratio) + other.r * ratio),
g=int(self.g * (1-ratio) + other.g * ratio),
b=int(self.b * (1-ratio) + other.b * ratio),
)
red = Colour(255, 0, 0)
blue = Colour(0, 0, 255)
purple = red.blend(blue)
print(purple) # Colour(r=127, g=0, b=127)
# Hashable - usable as dict key
palette = {red: 'red', blue: 'blue'}
# Immutable - this raises FrozenInstanceError
try:
red.r = 100
except Exception as e:
print(type(e).__name__) # FrozenInstanceError
Use dataclasses.replace() to create a modified copy:
from dataclasses import dataclass, replace
@dataclass(frozen=True)
class Config:
host: str = 'localhost'
port: int = 8080
debug: bool = False
prod = Config(host='example.com', port=443)
# Create a modified copy for staging
staging = replace(prod, host='staging.example.com', debug=True)
print(staging) # Config(host='staging.example.com', port=443, debug=True)
print(prod) # Config(host='example.com', port=443, debug=False) - unchanged
Inheritance
Dataclass inheritance works naturally. Child class fields come after parent fields in __init__:
from dataclasses import dataclass, field
@dataclass
class Animal:
name: str
species: str
@dataclass
class Pet(Animal):
owner: str = ''
tricks: list = field(default_factory=list)
p = Pet(name='Buddy', species='Canis lupus', owner='Alice')
print(p)
# Pet(name='Buddy', species='Canis lupus', owner='Alice', tricks=[])
If a parent dataclass has a field with a default, the child cannot have fields without defaults - they would appear after fields with defaults in __init__, which Python disallows. Fix: use field(default=...) for all child fields, or use kw_only=True (Python 3.10+) on the child class to make all child fields keyword-only, bypassing the ordering restriction.
from dataclasses import dataclass, field
@dataclass
class Base:
name: str
value: int = 0 # has a default
# Without kw_only, this would raise TypeError:
# class Child(Base): extra: str # no default - comes after a default field
@dataclass(kw_only=True) # all Child fields are keyword-only
class Child(Base):
extra: str # no default - ok because kw_only
c = Child(name='test', extra='hello')
print(c)
slots=True (Python 3.10+)
Combine @dataclass with slots=True for reduced memory usage and faster attribute access - ideal for many small instances:
import sys
from dataclasses import dataclass
@dataclass
class PointRegular:
x: float
y: float
@dataclass(slots=True)
class PointSlotted:
x: float
y: float
r = PointRegular(1.0, 2.0)
s = PointSlotted(1.0, 2.0)
print(sys.getsizeof(r)) # ~48 bytes
print(sys.getsizeof(r.__dict__)) # ~232 bytes
print(sys.getsizeof(s)) # ~56 bytes (no __dict__)
# 1 million instances
import tracemalloc
tracemalloc.start()
points_regular = [PointRegular(float(i), float(i)) for i in range(1_000_000)]
size_regular, _ = tracemalloc.get_traced_memory()
tracemalloc.reset_peak()
points_slotted = [PointSlotted(float(i), float(i)) for i in range(1_000_000)]
size_slotted, _ = tracemalloc.get_traced_memory()
print(f'regular: {size_regular/1e6:.1f} MB')
print(f'slotted: {size_slotted/1e6:.1f} MB')
Dataclass vs NamedTuple
| Feature | @dataclass | NamedTuple |
|---|---|---|
| Mutable by default | Yes | No (always immutable) |
| Hashable | Only with frozen=True | Yes |
| Tuple unpacking | No | Yes |
| Index access | No | Yes (p[0]) |
| Inheritance | Yes | Limited |
| Methods | Yes | Yes |
| slots | slots=True (3.10+) | Implicit (is a tuple) |
| Memory | Slightly higher | Tuple-efficient |
| dict/JSON serialisation | dataclasses.asdict() | ._asdict() |
from dataclasses import dataclass, asdict
from typing import NamedTuple
# NamedTuple - tuple semantics + named fields
class PointNT(NamedTuple):
x: float
y: float
p = PointNT(1.0, 2.0)
x, y = p # tuple unpacking
print(p[0]) # index access
print(hash(p)) # hashable
# @dataclass - mutable, more flexible
@dataclass
class PointDC:
x: float
y: float
p = PointDC(1.0, 2.0)
p.x = 5.0 # mutable
d = asdict(p) # {'x': 5.0, 'y': 2.0}
# When to use which:
# NamedTuple: lightweight records, tuple compatibility needed, always immutable
# @dataclass: mutable objects, complex defaults, validation in __post_init__,
# inheritance, or you need frozen=True selectively
dataclasses.asdict(obj) recursively converts a dataclass to a dict (including nested dataclasses, lists, and tuples). Combine with json.dumps(asdict(obj)) for easy JSON serialisation. For deserialisation, use MyClass(**data_dict) or a library like dacite or cattrs for nested types.