11.3. Dataclass Postinit

11.3.1. Rationale

  • Dataclasses generate __init__()

  • Overloading __init__() manually will destroy it

  • For init time validation there is __post_init__()

  • It is run after all parameters are set in the class

  • Hence you have to take care about negative cases (errors)

11.3.2. Initial Validation in Classes

>>> class Astronaut:
...
...     def __init__(self, firstname, lastname, age):
...         self.firstname = firstname
...         self.lastname = lastname
...         if 30 <= age < 50:
...             self.age = age
...         else:
...             raise ValueError('Age is out of range')
...
>>>
>>>
>>> astro = Astronaut('Mark', 'Watney', age=44)
>>> vars(astro)
{'firstname': 'Mark', 'lastname': 'Watney', 'age': 44}
>>>
>>> astro = Astronaut('Mark', 'Watney', age=60)
Traceback (most recent call last):
ValueError: Age is out of range
>>> from typing import Final
>>>
>>>
>>> class Astronaut:
...     firstname: str
...     lastname: str
...     age: int
...     AGE_MIN: Final[int] = 30
...     AGE_MAX: Final[int] = 50
...
...     def __init__(self, firstname, lastname, age):
...         self.firstname = firstname
...         self.lastname = lastname
...
...         if not self.AGE_MIN <= age < self.AGE_MAX:
...             raise ValueError('Age is out of range')
...         else:
...             self.age = age
>>>
>>>
>>> astro = Astronaut('Mark', 'Watney', age=44)
>>> vars(astro)
{'firstname': 'Mark', 'lastname': 'Watney', 'age': 44}
>>>
>>> Astronaut('Mark', 'Watney', age=60)
Traceback (most recent call last):
ValueError: Age is out of range

11.3.3. Initial Validation in Dataclasses

>>> from dataclasses import dataclass
>>> from typing import Final
>>>
>>>
>>> @dataclass
... class Astronaut:
...     firstname: str
...     lastname: str
...     age: int
...     AGE_MIN: Final[int] = 30
...     AGE_MAX: Final[int] = 50
...
...     def __post_init__(self):
...         if not self.AGE_MIN <= self.age < self.AGE_MAX:
...             raise ValueError('Age is out of range')
>>>
>>>
>>> Astronaut('Mark', 'Watney', age=44)
Astronaut(firstname='Mark', lastname='Watney', age=44, AGE_MIN=30, AGE_MAX=50)
>>>
>>> Astronaut('Mark', 'Watney', age=60)
Traceback (most recent call last):
ValueError: Age is out of range

11.3.4. Date and Time Conversion

>>> from dataclasses import dataclass
>>> from datetime import date
>>>
>>>
>>> @dataclass
... class Astronaut:
...     firstname: str
...     lastname: str
...     born: date
...
...     def __post_init__(self):
...         self.born = date.fromisoformat(self.born)
>>>
>>>
>>> Astronaut('Mark', 'Watney', '1961-04-12')  
Astronaut(firstname='Mark', lastname='Watney',
          born=datetime.date(1961, 4, 12))
>>> from dataclasses import dataclass
>>> from datetime import datetime
>>> from typing import Optional
>>>
>>>
>>> @dataclass
... class Astronaut:
...     firstname: str
...     lastname: str
...     launch: Optional[datetime] = None
...
...     def __post_init__(self):
...         if self.launch is not None:
...             self.launch = datetime.fromisoformat(self.launch)
>>>
>>>
>>> Astronaut('Mark', 'Watney')
Astronaut(firstname='Mark', lastname='Watney', launch=None)
>>>
>>> Astronaut('Mark', 'Watney', '1969-07-21T02:56:15+00:00')  
Astronaut(firstname='Mark', lastname='Watney',
          launch=datetime.datetime(1969, 7, 21, 2, 56, 15, tzinfo=datetime.timezone.utc))

11.3.5. InitVar

  • Init-only fields are added as parameters to the generated __init__ method, and are passed to the optional __post_init__ method

  • They are not otherwise used by Data Classes

>>> from dataclasses import dataclass, InitVar, field
>>>
>>>
>>> @dataclass
... class Astronaut:
...     fullname: InitVar[str] = None
...     firstname: str = field(init=False, default=None)
...     lastname: str = field(init=False, default=None)
...
...     def __post_init__(self, fullname: str):
...         self.firstname, self.lastname = fullname.split()
>>>
>>>
>>> astro = Astronaut('Mark Watney')
>>>
>>> astro
Astronaut(firstname='Mark', lastname='Watney')
>>>
>>> vars(astro)
{'firstname': 'Mark', 'lastname': 'Watney'}

11.3.6. ClassVar

One of two places where dataclass() actually inspects the type of a field is to determine if a field is a class variable as defined in PEP 526. It does this by checking if the type of the field is typing.ClassVar. If a field is a ClassVar, it is excluded from consideration as a field and is ignored by the dataclass mechanisms. Such ClassVar pseudo-fields are not returned by the module-level fields() function.

>>> from typing import ClassVar
>>> from dataclasses import dataclass
>>>
>>>
>>> @dataclass
... class Astronaut:
...     fullname: str
...     firstname: str
...     age: int
...     AGE_MIN: ClassVar[int] = 30
...     AGE_MAX: ClassVar[int] = 50

11.3.7. Use Case - Boundary check

>>> class Point:
...     def __init__(self, x, y):
...         if x < 0:
...             raise ValueError('Coordinate cannot be negative')
...         else:
...             self.x = x
...
...         if y < 0:
...             raise ValueError('Coordinate cannot be negative')
...         else:
...             self.y = y
>>> from dataclasses import dataclass
>>>
>>>
>>> @dataclass
... class Point:
...     x: int = 0
...     y: int = 0
...
...     def __post_init__(self):
...         if self.x < 0 or self.y < 0:
...             raise ValueError('Coordinate cannot be negative')

11.3.8. Use Case - Var Range

>>> from dataclasses import dataclass, field
>>> from typing import Final
>>>
>>>
>>> @dataclass
... class Point:
...     x: int = 0
...     y: int = 0
...     X_MIN: Final[int] = 0
...     X_MAX: Final[int] = 1024
...     Y_MIN: Final[int] = 0
...     Y_MAX: Final[int] = 768
...
...     def __post_init__(self):
...         if not self.X_MIN <= self.x < self.X_MAX:
...             raise ValueError(f'x value ({self.x}) is not between {self.X_MIN} and {self.X_MAX}')
...         if not self.Y_MIN <= self.y < self.Y_MAX:
...             raise ValueError(f'y value ({self.y}) is not between {self.Y_MIN} and {self.Y_MAX}')
>>>
>>>
>>> Point(0, 0)
Point(x=0, y=0, X_MIN=0, X_MAX=1024, Y_MIN=0, Y_MAX=768)
>>>
>>> Point(-1, 0)
Traceback (most recent call last):
ValueError: x value (-1) is not between 0 and 1024
>>>
>>> Point(0, 2000)
Traceback (most recent call last):
ValueError: y value (2000) is not between 0 and 768
>>>
>>> Point(0, 0, X_MIN=10, X_MAX=100)
Traceback (most recent call last):
ValueError: x value (0) is not between 10 and 100

11.3.9. Use Case - Const Range

>>> from dataclasses import dataclass, field
>>> from typing import Final
>>>
>>>
>>> @dataclass
... class Point:
...     x: int = 0
...     y: int = 0
...     X_MIN: Final[int] = field(init=False, default=0)
...     X_MAX: Final[int] = field(init=False, default=1024)
...     Y_MIN: Final[int] = field(init=False, default=0)
...     Y_MAX: Final[int] = field(init=False, default=768)
...
...     def __post_init__(self):
...         if not self.X_MIN <= self.x < self.X_MAX:
...             raise ValueError(f'x value ({self.x}) is not between {self.X_MIN} and {self.X_MAX}')
...         if not self.Y_MIN <= self.y < self.Y_MAX:
...             raise ValueError(f'y value ({self.y}) is not between {self.Y_MIN} and {self.Y_MAX}')
>>>
>>>
>>> Point(0, 0)
Point(x=0, y=0, X_MIN=0, X_MAX=1024, Y_MIN=0, Y_MAX=768)
>>>
>>> Point(0, 0, X_MIN=10, X_MAX=100)
Traceback (most recent call last):
TypeError: __init__() got an unexpected keyword argument 'X_MIN'

11.3.10. Use Case - Init, Repr

>>> from dataclasses import dataclass, field
>>> from typing import Final
>>>
>>>
>>> @dataclass
... class Point:
...     x: int = 0
...     y: int = 0
...     X_MIN: Final[int] = field(init=False, repr=False, default=0)
...     X_MAX: Final[int] = field(init=False, repr=False, default=1024)
...     Y_MIN: Final[int] = field(init=False, repr=False, default=0)
...     Y_MAX: Final[int] = field(init=False, repr=False, default=768)
...
...     def __post_init__(self):
...         if not self.X_MIN <= self.x < self.X_MAX:
...             raise ValueError(f'x value ({self.x}) is not between {self.X_MIN} and {self.X_MAX}')
...         if not self.Y_MIN <= self.y < self.Y_MAX:
...             raise ValueError(f'y value ({self.y}) is not between {self.Y_MIN} and {self.Y_MAX}')
>>>
>>>
>>> Point(0, 0)
Point(x=0, y=0)
>>>
>>> Point(-1, 0)
Traceback (most recent call last):
ValueError: x value (-1) is not between 0 and 1024
>>>
>>> Point(0, -1)
Traceback (most recent call last):
ValueError: y value (-1) is not between 0 and 768

11.3.11. Assignments

Code 11.18. Solution
"""
* Assignment: Dataclass Define Syntax
* Complexity: easy
* Lines of code: 7 lines
* Time: 5 min

English:
    1. Use Dataclass to define class `Point` with attributes:
        a. `x: int` with default value `0`
        b. `y: int` with default value `0`
    2. When `x` or `y` has negative value raise en exception `ValueError('Coordinate cannot be negative')`
    3. Use `datalass` and validation in `__post_init__()`
    4. Run doctests - all must succeed

Polish:
    1. Użyj Dataclass do zdefiniowania klasy `Point` z atrybutami:
        a. `x: int` z domyślną wartością `0`
        b. `y: int` z domyślną wartością `0`
    2. Gdy `x` lub `y` mają wartość ujemną podnieś wyjątek `ValueError('Coordinate cannot be negative')`
    3. Użyj `datalass` i walidacji w `__post_init__()`
    4. Uruchom doctesty - wszystkie muszą się powieść

Tests:
    >>> import sys; sys.tracebacklimit = 0
    >>> from inspect import isclass

    >>> assert isclass(Point)
    >>> assert hasattr(Point, 'x')
    >>> assert hasattr(Point, 'y')

    >>> Point()
    Point(x=0, y=0)

    >>> Point(x=0, y=0)
    Point(x=0, y=0)

    >>> Point(x=1, y=2)
    Point(x=1, y=2)

    >>> Point(x=-1, y=0)
    Traceback (most recent call last):
    ValueError: Coordinate cannot be negative

    >>> Point(x=0, y=-1)
    Traceback (most recent call last):
    ValueError: Coordinate cannot be negative
"""

from dataclasses import dataclass


Code 11.19. Solution
"""
* Assignment: Dataclass Define JSON
* Complexity: medium
* Lines of code: 23 lines
* Time: 21 min

English:
    1. You received input data in JSON format from the API
        a. `str` fields: firstname, lastname, role, username, password, email,
        b. `datetime` fields: born, last_login,
        c. `bool` fields: is_active, is_staff, is_superuser,
        d. `list[dict]` field: user_permissions
    2. Using `dataclass` model data as class `User`
        a. Note, that fields order is important for tests to pass
    3. Parse fields with dates and store as `datetime` objects
    4. Parse fields with `true` and `false` values and store as `bool` objects
    5. Do not create additional classes to represent `permission` filed, leave it as `list[dict]`
    6. Iterate over records and create instances of this class
    7. Collect all instances to `result: list[User]`
    8. Run doctests - all must succeed

Polish:
    1. Otrzymałeś z API dane wejściowe w formacie JSON
        a. pola `str`: firstname, lastname, role, username, password, email,
        b. pola `datetime`: born, last_login,
        c. pola `bool`: is_active, is_staff, is_superuser,
        d. pola `list[dict]`: user_permissions
    2. Wykorzystując `dataclass` zamodeluj dane za pomocą klasy `User`
        a. Zwróć uwagę, że kolejność pól ma znaczenie aby testy przechodziły
    3. Sparsuj pola zwierające daty i zapisz je jako obiekty `datetime`
    4. Sparsuj pola zawierające `true` lub `false` i zapamiętaj ich wartości jako obiekty `bool`
    5. Nie twórz dodatkowych klas do reprezentacji pola `permission`, niech zostanie jako `list[dict]`
    6. Iterując po rekordach twórz instancje tej klasy
    7. Zbierz wszystkie instancje do `result: list[User]`
    8. Uruchom doctesty - wszystkie muszą się powieść

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> result  # doctest: +NORMALIZE_WHITESPACE
    [User(firstname='Melissa', lastname='Lewis', role='commander', username='mlewis', password='pbkdf2_sha256$120000$gvEBNiCeTrYa0$5C+NiCeTrYsha1PHogqvXNiCeTrY0CRSLYYAA90=', email='melissa.lewis@nasa.gov', born=datetime.date(1995, 7, 15), last_login=datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), is_active=True, is_staff=True, is_superuser=False, user_permissions=[{'eclss': ['add', 'modify', 'view']}, {'communication': ['add', 'modify', 'view']}, {'medical': ['add', 'modify', 'view']}, {'science': ['add', 'modify', 'view']}]),
     User(firstname='Rick', lastname='Martinez', role='pilot', username='rmartinez', password='pbkdf2_sha256$120000$aXNiCeTrY$UfCJrBh/qhXohNiCeTrYH8nsdANiCeTrYnShs9M/c=', email='rick.martinez@ansa.gov', born=datetime.date(1996, 1, 21), last_login=None, is_active=True, is_staff=True, is_superuser=False, user_permissions=[{'communication': ['add', 'view']}, {'eclss': ['add', 'modify', 'view']}, {'science': ['add', 'modify', 'view']}]),
     User(firstname='Alex', lastname='Vogel', role='chemist', username='avogel', password='pbkdf2_sha256$120000$eUNiCeTrYHoh$X32NiCeTrYZOWFdBcVT1l3NiCeTrY4WJVhr+cKg=', email='alex.vogel@esa.int', born=datetime.date(1994, 11, 15), last_login=None, is_active=True, is_staff=True, is_superuser=False, user_permissions=[{'eclss': ['add', 'modify', 'view']}, {'communication': ['add', 'modify', 'view']}, {'medical': ['add', 'modify', 'view']}, {'science': ['add', 'modify', 'view']}]),
     User(firstname='Chris', lastname='Beck', role='crew-medical-officer', username='cbeck', password='pbkdf2_sha256$120000$3G0RNiCeTrYlaV1$mVb62WNiCeTrYQ9aYzTsSh74NiCeTrY2+c9/M=', email='chris.beck@nasa.gov', born=datetime.date(1999, 8, 2), last_login=datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), is_active=True, is_staff=True, is_superuser=False, user_permissions=[{'communication': ['add', 'view']}, {'medical': ['add', 'modify', 'view']}, {'science': ['add', 'modify', 'view']}]),
     User(firstname='Beth', lastname='Johansen', role='sysop', username='bjohansen', password='pbkdf2_sha256$120000$QmSNiCeTrYBv$Nt1jhVyacNiCeTrYSuKzJ//WdyjlNiCeTrYYZ3sB1r0g=', email='', born=datetime.date(2006, 5, 9), last_login=None, is_active=True, is_staff=True, is_superuser=False, user_permissions=[{'communication': ['add', 'view']}, {'science': ['add', 'modify', 'view']}]),
     User(firstname='Mark', lastname='Watney', role='botanist', username='mwatney', password='pbkdf2_sha256$120000$bxS4dNiCeTrY1n$Y8NiCeTrYRMa5bNJhTFjNiCeTrYp5swZni2RQbs=', email='', born=datetime.date(1994, 10, 12), last_login=None, is_active=True, is_staff=True, is_superuser=False, user_permissions=[{'communication': ['add', 'modify', 'view']}, {'science': ['add', 'modify', 'view']}])]
"""

import json
from dataclasses import dataclass
from datetime import date, datetime, timezone
from typing import Optional, Union

DATA = '[{"model":"authorization.user","pk":1,"fields":{"firstname":"Melissa","lastname":"Lewis","role":"commander","username":"mlewis","password":"pbkdf2_sha256$120000$gvEBNiCeTrYa0$5C+NiCeTrYsha1PHogqvXNiCeTrY0CRSLYYAA90=","email":"melissa.lewis@nasa.gov","born":"1995-07-15","last_login":"1970-01-01T00:00:00.000+00:00","is_active":true,"is_staff":true,"is_superuser":false,"user_permissions":[{"eclss":["add","modify","view"]},{"communication":["add","modify","view"]},{"medical":["add","modify","view"]},{"science":["add","modify","view"]}]}},{"model":"authorization.user","pk":2,"fields":{"firstname":"Rick","lastname":"Martinez","role":"pilot","username":"rmartinez","password":"pbkdf2_sha256$120000$aXNiCeTrY$UfCJrBh/qhXohNiCeTrYH8nsdANiCeTrYnShs9M/c=","born":"1996-01-21","last_login":null,"email":"rick.martinez@ansa.gov","is_active":true,"is_staff":true,"is_superuser":false,"user_permissions":[{"communication":["add","view"]},{"eclss":["add","modify","view"]},{"science":["add","modify","view"]}]}},{"model":"authorization.user","pk":3,"fields":{"firstname":"Alex","lastname":"Vogel","role":"chemist","username":"avogel","password":"pbkdf2_sha256$120000$eUNiCeTrYHoh$X32NiCeTrYZOWFdBcVT1l3NiCeTrY4WJVhr+cKg=","email":"alex.vogel@esa.int","born":"1994-11-15","last_login":null,"is_active":true,"is_staff":true,"is_superuser":false,"user_permissions":[{"eclss":["add","modify","view"]},{"communication":["add","modify","view"]},{"medical":["add","modify","view"]},{"science":["add","modify","view"]}]}},{"model":"authorization.user","pk":4,"fields":{"firstname":"Chris","lastname":"Beck","role":"crew-medical-officer","username":"cbeck","password":"pbkdf2_sha256$120000$3G0RNiCeTrYlaV1$mVb62WNiCeTrYQ9aYzTsSh74NiCeTrY2+c9/M=","email":"chris.beck@nasa.gov","born":"1999-08-02","last_login":"1970-01-01T00:00:00.000+00:00","is_active":true,"is_staff":true,"is_superuser":false,"user_permissions":[{"communication":["add","view"]},{"medical":["add","modify","view"]},{"science":["add","modify","view"]}]}},{"model":"authorization.user","pk":5,"fields":{"firstname":"Beth","lastname":"Johansen","role":"sysop","username":"bjohansen","password":"pbkdf2_sha256$120000$QmSNiCeTrYBv$Nt1jhVyacNiCeTrYSuKzJ//WdyjlNiCeTrYYZ3sB1r0g=","email":"","born":"2006-05-09","last_login":null,"is_active":true,"is_staff":true,"is_superuser":false,"user_permissions":[{"communication":["add","view"]},{"science":["add","modify","view"]}]}},{"model":"authorization.user","pk":6,"fields":{"firstname":"Mark","lastname":"Watney","role":"botanist","username":"mwatney","password":"pbkdf2_sha256$120000$bxS4dNiCeTrY1n$Y8NiCeTrYRMa5bNJhTFjNiCeTrYp5swZni2RQbs=","email":"","born":"1994-10-12","last_login":null,"is_active":true,"is_staff":true,"is_superuser":false,"user_permissions":[{"communication":["add","modify","view"]},{"science":["add","modify","view"]}]}}]'  # noqa