Skip to content

Commit

Permalink
Add missing docs in keylime.models.base
Browse files Browse the repository at this point in the history
Signed-off-by: Jean Snyman <git@jsnyman.com>
  • Loading branch information
stringlytyped committed Jun 19, 2024
1 parent 42fa62d commit 89474ee
Show file tree
Hide file tree
Showing 7 changed files with 429 additions and 9 deletions.
36 changes: 36 additions & 0 deletions keylime/models/base/associations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@


class AssociatedRecordSet(set["BasicModel"]):
"""An AssociatedRecordSet contains a set of model instances (i.e., *records*) linked to a *parent record* by way of
an association established between two models. With a "to-many" association, the set can contain an unbounded number
of records but with a "to-one" association, the set will only ever contain one at most.
"""

def __init__(self, parent_record: "BasicModel", association: "ModelAssociation", *args: Any, **kwargs: Any) -> None:
self._parent_record = parent_record
self._association = association
Expand Down Expand Up @@ -52,6 +57,18 @@ def model(self) -> type["BasicModel"]:


class ModelAssociation(ABC):
"""A ModelAssociation represents a one-way association from one model to another. It cannot be instantiated directly
and should be inherited from and customised for the specific type of association.
As a Python descriptor [1], ModelAssociation allows associated records to be accessed from the parent record using
dot notation. However, because the __get__ and __set__ methods need to differ depending on the association type (a
"to-one" association should produce a single record whereas a "to-many" should return the whole
AssociatedRecordSet), these are left to be defined by the subclass. Even so, protected getters and setters are
provided by ModelAssociation to make these implementations as simple as possible and avoid duplication.
[1] https://docs.python.org/3/howto/descriptor.html
"""

def __init__(self, name: str, other_model: type["BasicModel"], inverse_of: Optional[str] = None) -> None:
self._name: str = name
self._private_member: str = "_" + name
Expand Down Expand Up @@ -128,6 +145,10 @@ def preload(self) -> bool:


class EntityAssociation(ModelAssociation):
"""EntityAssociation extends ModelAssociation to provide additional functionality common to associations which
map to a relationship between database entities.
"""

def __init__(
self,
name: str,
Expand Down Expand Up @@ -172,6 +193,11 @@ def other_model(self) -> type["PersistableModel"]:


class HasOneAssociation(EntityAssociation):
"""A HasOneAssociation is an association between database-backed models which allows a record to be linked to
one other. As this is achieved in the database engine with a foreign key in the associated record, it needs a
corresponding BelongsToAssociation in the associated model.
"""

def __get__(
self, parent_record: "BasicModel", _objtype: Optional[type["BasicModel"]] = None
) -> Union["PersistableModel", "HasOneAssociation", None]:
Expand All @@ -187,6 +213,11 @@ def _get_one(self, parent_record: "BasicModel") -> Union["PersistableModel", "Ha


class HasManyAssociation(EntityAssociation):
"""A HasManyAssociation is an association between database-backed models which allows a record to be linked to
an number of others. As this is achieved in the database engine with a foreign key in the associated records, it
needs a corresponding BelongsToAssociation in the associated model.
"""

def __get__(
self, parent_record: "BasicModel", _objtype: Optional[type["BasicModel"]] = None
) -> Union["AssociatedRecordSet", "HasManyAssociation", None]:
Expand All @@ -199,6 +230,11 @@ def _get_many(self, parent_record: "BasicModel") -> Union[AssociatedRecordSet, "


class BelongsToAssociation(EntityAssociation):
"""A BelongsToAssociation is the inverse of a HasOneAssociation or HasManyAssociation. Like a HasOneAssociation, it
links its parent record to, at most, one other record. In addition, it populates the parent record's foreign key
field whenever the associated record changes.
"""

def __init__(
self,
name: str,
Expand Down
178 changes: 172 additions & 6 deletions keylime/models/base/basic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class BasicModel(ABC, metaclass=BasicModelMeta):
If you need to persist a model to the database, you should subclass ``PersistableModel`` instead (which itself
inherits from ``BasicModel``).
Declaring a model and its schema
Declaring a Model and Its Schema
--------------------------------
To create a new model, declare a new class which inherits from ``BasicModel`` and implement the required ``_schema``
Expand All @@ -35,11 +35,177 @@ def _schema(cls):
cls._field("email", String, nullable=True)
# (Any additional fields...)
Fields are defined using the ``cls._field(...)`` helper method. Calls to this method must happen within the
``_schema`` method or a method called from ``_schema`` to ensure that they are invoked at the right point in the
model's lifecycle. The ``_field`` method takes a name, data type and an optional list of options. When subclassing
``BasicModel`` directly as in the example above, the only option accepted is ``nullable`` which controls whether
the field will accept empty values like ``None`` or ``""``.
Fields are defined using the ``cls._field(...)`` helper method. Calls to this method (or any other helper) must
happen within the ``_schema`` method to ensure that they are invoked at the right point in the model's lifecycle.
The ``_field`` method takes a name, data type and an optional list of options. When subclassing ``BasicModel``
directly as in the example above, the only option accepted is ``nullable`` which controls whether the field will
accept empty values like ``None`` or ``""``.
Schema helper methods mutate the model, causing various class members to be created or modified dynamically at time
of invocation. For instance, the "email" field declared in the example above causes a ``ModelField`` instance to be
added to the ``User`` class as a descriptor. This allows access of the field value via dot notation::
user = User.empty()
user.name = "John"
print(user.name) #=> "John"
Definitions of the helper methods can be found in the ``BasicModelMeta`` class.
Associations
------------
Models can be linked to other models so that records may contain references to other records. For the moment, no
associations can be declared on a model based on ``BasicModel`` but this will likely change in the future. See the
documentation for ``PersistableModel`` for associations which can be declared on database-backed models.
Mutating Records
----------------
As shown previously, it is possible to change a field by assigning it as if it were a property/attribute of the
record. Behind the scenes, this calls the ``record.change(...)`` method which can also be invoked directly. The new
value is saved in a dictionary of pending changes accessible from ``record.changes``.
For convenience, you may change several fields at once by calling ``record.cast_changes(data, fields)`` where
``data`` is a dictionary with the new values and ``fields`` is a list of field names to change. ``data`` may contain
any arbitrary data (even data which originates from outside the application) as only the fields explicitly listed in
``fields`` will be affected. This is illustrated below::
def change_user_profile(user, profile_data):
user.cast_changes(profile_data, ["name", "email", "phone"])
# ...
# Data received in HTTP request:
request_data = {
"name": "John",
"email": "jsmith@example.com",
"admin": True
}
change_user_profile(user, request_data)
# "admin" field has not been changed:
print(user.admin) #=> False
Pending changes can be accepted by calling ``record.commit_changes()``. This causes the values in ``record.changes``
to be moved to the ``record.committed`` dictionary. A common pattern is to queue up changes to several fields,
perform validation on the pending changes, and then commit them all in one go.
Accessing Field Values
----------------------
Reading a field value using dot notation will return any pending change for the field. If no pending change is
present in the record, this will fall back on the committed value for the field. Alternatively, you may access
values from ``record.values`` which uses this same behaviour. This is illustrated by the below example::
print(user.committed.get("name")) #=> "John"
print(user.changes.get("name")) #=> None
print(user.values.get("name")) #=> "John"
print(user.name) #=> "John"
user.change("name", "Jane")
print(user.committed.get("name")) #=> "John"
print(user.changes.get("name")) #=> "Jane"
print(user.values.get("name")) #=> "Jane"
print(user.name) #=> "Jane"
Data Validation
---------------
Pending changes can be checked to conform to the expected format by using the various data validation methods. For
example, calling ``record.validate_length("name", max: 50)`` will check that the "name" field is no longer than 50
characters. If the check fails, an error will be recorded for that field.
When errors are present in a record, ``record.commit_changes()`` raises a ``FieldValueInvalid`` exception. You can
check whether there are any errors present for the record's pending changes by calling ``record.changes_valid``.
And you can get the dictionary of errors (organised by field) from ``record.errors``.
There are validation methods for various types of data and situations. However, you may need perform your own custom
data validation. In such case, you can call ``record._add_error(field, msg)`` where ``field`` is the name of the
field with the invalid change and ``msg`` is a short explanation of why the change is considered invalid. You should
expect ``msg`` to be returned to the API consumer in an HTTP response, so it should not contain sensitive
information about the internal state of the server.
NOTE: You may find it peculiar that validation rules are not defined within each field declaration, a common pattern
for data model libraries. This design is intentional as it provides more flexibility, allowing you to apply
validation rules conditionally depending on the circumstance or current state of the record. See the "Paradigms for
Good Model Design" section for details.
Rendering Records
-----------------
Calling ``record.render()`` produces a JSON-serialisable dictionary of the record's contents with field names mapped
to field values. When using this method to produce user-facing output, e.g., in the context of an HTTP response, it
is recommended that you pass in a list of allowable fields. When the method is used in this way, no field which
isn't explicitly listed will be included in the output. This helps ensure that no field containing sensitive data
(e.g., a password) is explicitly output.
NOTE: You should do this even when the information in your model is entirely benign. You may not be able to predict
what fields will be added to the model in the future.
You may wish to override the render method to provide a sensible default for the list of allowable fields. That way,
a call to ``record.render()`` with no arguments will return those specific fields, rather than the entire record.
This saves on typing, reduces complexity in your controllers, and prevents users of your model from accidentally
leaking sensitive data. Here is an example of how you may achieve this::
def render(self, only=None):
if not only:
only = ["username", "bio"]
return super().render(only)
Paradigms for Good Model Design
-------------------------------
When creating a new model, you should avoid defining public methods, getters and setters or properties for
retrieving and mutating individual fields as these are already provided for you, being generated in response to the
schema you've defined. Because of this, you should not attempt to validate data at the point of being received by a
field, or try to prevent fields from being accessed or mutated from outside the model/record.
Instead, your methods should be concerned with managing the data lifecycle of the model. When data is changed as a
result of a particular event, your model should have a way of handling that specific scenario, including performing
the relevant data validation.
To illustrate this, imagine a ``User`` model for a typical web application. Records of this model are created when a
user registers and changed when a user edits their profile settings. The app also has a way of resetting a user's
password if forgotten. One way of handling all of this is to define different methods for each possibility, e.g.:
* ``User.register(data)``: a class method that creates and returns a new ``User`` object using the ``data`` received
from the registration form
* ``user.edit_profile(data)``: an instance method which changes the existing ``user`` using ``data`` received from
the profile edit form
* ``user.reset_password(data)``: an instance method which changes the existing ``user`` using ``data`` received
from the password reset form
Each of these methods would likely call ``cls.cast_changes(...)`` internally to only modify those fields which are
permitted to be changed in each circumstance. Then, they would perform validation of the incoming data (using the
various ``cls.validate...`` methods) as relevant. These methods may also set or initialise internal fields such as
the timestamp at which the user account is created.
Treating each scenario separately allows us to prevent changes to a user's date of birth and username after account
creation. The password field can be required during registration but optional when the user is editing their
profile. And we can prevent a password reset if the user has not confirmed their email address.
We may also vary how a given field is treated based on the value of another field. For instance, we could check that
a "confirm password" field matches the "password" field but only if the password is currently being changed,
allowing the user to leave these fields blank on the profile edit form if they are only changing other settings.
This is a technique that is used quite heavily in the ``RegistrarAgent`` model, for example, to require the presence
of a DevID in the absence of an EK cert and vice versa.
You should think about output in the same way. A user will appear differently in an admin interface than when shown
as a public profile, so it would make sense to provide multiple render functions, e.g., ``user.render_full()`` and
``user.render_public()``.
It is important to recognise that many models are simpler than our ``User`` example such that updating a record
looks mostly the same across all scenarios and the action of creating a record is not much different either. In
these cases, it may make sense to provide a single ``record.update(data)`` instance method and chain this with a
call to the built-in ``Model.empty()`` class method when you need to create new records::
record = Model.empty().update(data)
However, you should still think carefully about the data lifecycle of a "simple" model as you may need to manage how
parts of it transform over time, depending on the context of other changes. For example, when the EK or AK of a
``RegistrarAgent`` record changes, the "active" field is reset to false to require a repeat of the
TPM2_ActivateCredential process and cryptography bind the AK to EK.
"""

INST_ATTRS: tuple[str, ...] = ("_committed", "_changes", "_errors")
Expand Down
27 changes: 27 additions & 0 deletions keylime/models/base/basic_model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,33 @@


class BasicModelMeta(ABCMeta):
"""BasicModelMeta is used to dynamically build model classes based on schema definitions. All models classes which
inherit from either ``BasicModel`` or ``PersistableModel`` are themselves instances of BasicModelMeta. The meta
class contains various methods (many of them private/protected) for transforming models. Instance members of the
meta class become class members of the model, so any method not marked as a ``@staticmethod`` or ``@classmethod`` is
accessible directly on the model class.
Schema Helpers
--------------
The ``BasicModelMeta`` class provides helper methods (macros) for declaring a model's schema. These are explained in
the documentation for ``BasicModel``.
The Lifecycle of a Model Class
------------------------------
When a new model is created by inheriting from ``BasicModel`` (or subclass), ``BasicModelMeta.__new__(...)``
executes, creating and initialising a number of class members on the model. At this point, no fields or associations
have been created.
To cause the declarations in the model's schema to be interpreted, a call to the ``Model.process_schema()`` method
provided by ``BasicModelMeta`` is required. This method does not usually need to be invoked manually as it should
be called whenever the first class or instance property/attribute is accessed. It causes the ``Model._schema``
method, and thus any schema helpers, to be invoked. The schema helpers create the fields, associations and other
class members. Afterward, ``Model.process_schema()`` disables the schema helpers so that they cannot be used to
modify the model after this point.
"""

# pylint: disable=bad-staticmethod-argument, no-value-for-parameter, using-constant-test

DeclaredFieldType: TypeAlias = Union[ModelType, TypeEngine, type[ModelType], type[TypeEngine]]
Expand Down
15 changes: 13 additions & 2 deletions keylime/models/base/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@


class ModelField:
"""ModelField is used to represent fields in a model. As a Python descriptor [1], when instantiated and assigned to
a class member, it can be accessed from instances of that class as if it were a property [2] of the instance. This
makes it possible for a model field to be accessed using dot notation (e.g., ``record.field = 123``) even though its
data is stored within a private instance variable.
Typically ModelField is not instantiated outside the ``keylime.models.base`` package.
[1] https://docs.python.org/3/howto/descriptor.html
[2] https://docs.python.org/3/library/functions.html#property
"""

DeclaredFieldType: TypeAlias = Union[ModelType, TypeEngine, type[ModelType], type[TypeEngine]]

FIELD_NAME_REGEX = re.compile(r"^[A-Za-z_]+[A-Za-z0-9_]*$")
Expand All @@ -37,8 +48,8 @@ def __init__(self, name: str, data_type: DeclaredFieldType, nullable: bool = Fal
self._data_type = ModelType(data_type)
else:
raise FieldDefinitionInvalid(
f"field '{name}' cannot be defined with type '{data_type}' as this is neither a ModelType subclass/instance "
f"nor a SQLAlchemy data type inheriting from 'sqlalchemy.types.TypeEngine'"
f"field '{name}' cannot be defined with type '{data_type}' as this is neither a ModelType "
f"subclass/instance nor a SQLAlchemy data type inheriting from 'sqlalchemy.types.TypeEngine'"
)

def __get__(self, obj: Optional["BasicModel"], _objtype: Optional[type["BasicModel"]] = None) -> Any:
Expand Down
Loading

0 comments on commit 89474ee

Please sign in to comment.