-
Notifications
You must be signed in to change notification settings - Fork 2
Instate msgspec type registration mechanism #199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# SPDX-FileCopyrightText: 2025-present Datadog, Inc. <dev@datadoghq.com> | ||
# | ||
# SPDX-License-Identifier: MIT |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# SPDX-FileCopyrightText: 2025-present Datadog, Inc. <dev@datadoghq.com> | ||
# | ||
# SPDX-License-Identifier: MIT | ||
from __future__ import annotations | ||
|
||
from types import MappingProxyType | ||
from typing import TYPE_CHECKING, Any | ||
|
||
from msgspec import Struct | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Callable | ||
|
||
|
||
class Hook(Struct, frozen=True): | ||
encode: Callable[[Any], Any] | ||
decode: Callable[[Any], Any] | ||
|
||
|
||
def register_type_hooks( | ||
typ: type[Any], | ||
*, | ||
encode: Callable[[Any], Any], | ||
decode: Callable[[Any], Any], | ||
) -> None: | ||
__HOOKS[typ] = Hook(encode=encode, decode=decode) | ||
|
||
|
||
def enc_hook(obj: Any) -> Any: | ||
if (registered_type := __HOOKS.get(type(obj))) is not None: | ||
return registered_type.encode(obj) | ||
|
||
message = f"Cannot encode: {obj!r}" | ||
raise NotImplementedError(message) | ||
|
||
|
||
def dec_hook(typ: type[Any], obj: Any) -> Any: | ||
if (registered_type := __HOOKS.get(typ)) is not None: | ||
return registered_type.decode(obj) | ||
|
||
message = f"Cannot decode: {obj!r}" | ||
raise ValueError(message) | ||
|
||
|
||
__HOOKS: dict[type[Any], Hook] = {} | ||
|
||
register_type_hooks(MappingProxyType, encode=dict, decode=MappingProxyType) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,15 +6,17 @@ | |
|
||
from enum import StrEnum | ||
from functools import cached_property | ||
from itertools import chain | ||
from types import MappingProxyType | ||
from typing import TYPE_CHECKING, Self | ||
|
||
from msgspec import Struct | ||
from msgspec import Struct, convert, to_builtins | ||
|
||
from dda.types.hooks import dec_hook, enc_hook, register_type_hooks | ||
from dda.utils.fs import Path | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import ItemsView, Iterable, Iterator, KeysView, ValuesView | ||
from collections.abc import Iterable | ||
|
||
|
||
class ChangeType(StrEnum): | ||
|
@@ -58,78 +60,45 @@ class ChangeSet: # noqa: PLW1641 | |
When considering the changes to the working directory, the untracked files are considered as added files. | ||
""" | ||
|
||
def __init__(self, changes: dict[Path, ChangedFile]) -> None: | ||
self.__changes = MappingProxyType(changes) | ||
def __init__(self, changed_files: Iterable[ChangedFile]) -> None: | ||
self.__changed = MappingProxyType({str(c.path): c for c in changed_files}) | ||
self.__files = tuple(self.__changed.values()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to maintain the order. |
||
|
||
@property | ||
def changes(self) -> MappingProxyType[Path, ChangedFile]: | ||
return self.__changes | ||
def paths(self) -> MappingProxyType[str, ChangedFile]: | ||
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This provides the previous interface of all changes being represented by a mapping of relative paths to the A benefit to this is that callers possessing |
||
return self.__changed | ||
|
||
def keys(self) -> KeysView[Path]: | ||
return self.__changes.keys() | ||
|
||
def values(self) -> ValuesView[ChangedFile]: | ||
return self.__changes.values() | ||
|
||
def items(self) -> ItemsView[Path, ChangedFile]: | ||
return self.__changes.items() | ||
|
||
def __getitem__(self, key: Path) -> ChangedFile: | ||
return self.__changes[key] | ||
|
||
def __contains__(self, key: Path) -> bool: | ||
return key in self.__changes | ||
|
||
def __len__(self) -> int: | ||
return len(self.__changes) | ||
|
||
def __iter__(self) -> Iterator[Path]: | ||
return iter(self.__changes.keys()) | ||
|
||
def __or__(self, other: Self) -> Self: | ||
return self.from_iter(list(self.values()) + list(other.values())) | ||
|
||
def __eq__(self, other: object) -> bool: | ||
return isinstance(other, ChangeSet) and self.__changes == other.__changes | ||
|
||
@cached_property | ||
def added(self) -> set[Path]: | ||
"""List of files that were added.""" | ||
return {change.path for change in self.values() if change.type == ChangeType.ADDED} | ||
@property | ||
def files(self) -> Iterable[ChangedFile]: | ||
return self.__files | ||
|
||
@cached_property | ||
def modified(self) -> set[Path]: | ||
"""List of files that were modified.""" | ||
return {change.path for change in self.values() if change.type == ChangeType.MODIFIED} | ||
@property | ||
def added(self) -> MappingProxyType[str, ChangedFile]: | ||
"""Set of files that were added.""" | ||
return self.__change_types[ChangeType.ADDED] | ||
|
||
@cached_property | ||
def deleted(self) -> set[Path]: | ||
"""List of files that were deleted.""" | ||
return {change.path for change in self.values() if change.type == ChangeType.DELETED} | ||
@property | ||
def modified(self) -> MappingProxyType[str, ChangedFile]: | ||
"""Set of files that were modified.""" | ||
return self.__change_types[ChangeType.MODIFIED] | ||
Comment on lines
+80
to
+83
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should convert our keys to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If so then they can always |
||
|
||
@cached_property | ||
def changed(self) -> set[Path]: | ||
"""List of files that were changed (added, modified, or deleted).""" | ||
return set(self.keys()) | ||
@property | ||
def deleted(self) -> MappingProxyType[str, ChangedFile]: | ||
"""Set of files that were deleted.""" | ||
return self.__change_types[ChangeType.DELETED] | ||
|
||
def digest(self) -> str: | ||
"""Compute a hash of the changeset.""" | ||
from hashlib import sha256 | ||
|
||
digester = sha256() | ||
for change in sorted(self.values(), key=lambda x: x.path.as_posix()): | ||
for change in sorted(self.files, key=lambda cf: cf.path): | ||
digester.update(change.path.as_posix().encode()) | ||
digester.update(change.type.value.encode()) | ||
digester.update(change.patch.encode()) | ||
|
||
return str(digester.hexdigest()) | ||
|
||
@classmethod | ||
def from_iter(cls, data: Iterable[ChangedFile]) -> Self: | ||
"""Create a ChangeSet from an iterable of FileChanges.""" | ||
items = {change.path: change for change in data} | ||
return cls(changes=items) | ||
|
||
@classmethod | ||
def from_patches(cls, diff_output: str | list[str]) -> Self: | ||
""" | ||
|
@@ -182,7 +151,21 @@ def from_patches(cls, diff_output: str | list[str]) -> Self: | |
# Strip every "block" and add the missing separator | ||
patch = "" if binary else "\n".join([sep + block.strip() for block in blocks]).strip() | ||
changes.append(ChangedFile(path=current_file, type=current_type, binary=binary, patch=patch)) | ||
return cls.from_iter(changes) | ||
return cls(changes) | ||
|
||
def __or__(self, other: Self) -> Self: | ||
return type(self)(chain(self.files, other.files)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I couldn't really find a way to satisfy this other than having a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems very clean to me like this anyway, good trick ! |
||
|
||
def __eq__(self, other: object) -> bool: | ||
return isinstance(other, ChangeSet) and self.paths == other.paths | ||
|
||
@cached_property | ||
def __change_types(self) -> dict[ChangeType, MappingProxyType[str, ChangedFile]]: | ||
changes: dict[ChangeType, dict[str, ChangedFile]] = {} | ||
for change in self.files: | ||
changes.setdefault(change.type, {})[str(change.path)] = change | ||
|
||
return {change_type: MappingProxyType(paths) for change_type, paths in changes.items()} | ||
|
||
|
||
def _determine_change_type(before_filename: str, after_filename: str) -> ChangeType: | ||
|
@@ -195,3 +178,10 @@ def _determine_change_type(before_filename: str, after_filename: str) -> ChangeT | |
|
||
msg = f"Unexpected file paths in git diff output: {before_filename} -> {after_filename} - this indicates a rename which we do not support" | ||
raise ValueError(msg) | ||
|
||
|
||
register_type_hooks( | ||
ChangeSet, | ||
encode=lambda obj: to_builtins(obj.files, enc_hook=enc_hook), | ||
decode=lambda obj: ChangeSet(convert(cf, ChangedFile, dec_hook=dec_hook) for cf in obj), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,32 @@ | ||
{ | ||
"file2.txt": { | ||
[ | ||
{ | ||
"path": "file2.txt", | ||
"type": "D", | ||
"binary": false, | ||
"patch": "@@ -1,4 +0,0 @@\n-I am file2 !\n-I feel like I take up space for nothing...\n-I have a feeling like I won't exist pretty soon :/\n-" | ||
}, | ||
"file3.txt": { | ||
{ | ||
"path": "file3.txt", | ||
"type": "A", | ||
"binary": false, | ||
"patch": "@@ -0,0 +1,2 @@\n+I am file3 !\n+I'm new around here, hopefully everyone treats me nice :)" | ||
}, | ||
"file4.txt": { | ||
{ | ||
"path": "file4.txt", | ||
"type": "M", | ||
"binary": false, | ||
"patch": "@@ -2 +2 @@ I am file4.\n-People often tell me I am unreliable.\n+People often tell me I am THE BEST.\n@@ -4,3 +4,2 @@ Things like:\n-- You always change !\n-- I can never count on you...\n-- I didn't recognize you !\n+- You rock !\n+- I wish I were you !\n@@ -8 +7,3 @@ Do you think they have a point ?\n-I'd need to look at my own history to know...\n+Pah ! Who am I kidding, they're OBVIOUSLY RIGHT.\n+Arrogance ? What is that, an italian ice cream flavor ?\n+Get outta here !" | ||
}, | ||
"file5.txt": { | ||
{ | ||
"path": "file5.txt", | ||
"type": "D", | ||
"binary": false, | ||
"patch": "@@ -1,5 +0,0 @@\n-I am a humble file.\n-Soon I will change name.\n-I think I'll also take this as an opportunity to change myself.\n-New name, new me !\n-Or is that not how the saying goes ?" | ||
}, | ||
"file5_new.txt": { | ||
{ | ||
"path": "file5_new.txt", | ||
"type": "A", | ||
"binary": false, | ||
"patch": "@@ -0,0 +1,5 @@\n+I am a humble file.\n+Hey I have a new name !\n+Wow, I feel much better now.\n+New name, new me !\n+Or is that not how the saying goes ?" | ||
} | ||
} | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
{ | ||
"file2.txt": { | ||
[ | ||
{ | ||
"path": "file2.txt", | ||
"type": "A", | ||
"binary": false, | ||
"patch": "@@ -0,0 +1,3 @@\n+file2\n+I am a new file in the repo !\n+That's incredible." | ||
} | ||
} | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
{ | ||
"file2.txt": { | ||
[ | ||
{ | ||
"path": "file2.txt", | ||
"type": "D", | ||
"binary": false, | ||
"patch": "@@ -1,3 +0,0 @@\n-file2\n-I will be deleted, unfortunately.\n-That's quite sad." | ||
} | ||
} | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,32 @@ | ||
{ | ||
"added_lines.txt": { | ||
[ | ||
{ | ||
"path": "added_lines.txt", | ||
"type": "M", | ||
"binary": false, | ||
"patch": "@@ -2,0 +3,4 @@ I wonder what that could mean ?\n+\n+But of course !\n+I get some added lines.\n+That makes sense." | ||
}, | ||
"added_lines_middle.txt": { | ||
{ | ||
"path": "added_lines_middle.txt", | ||
"type": "M", | ||
"binary": false, | ||
"patch": "@@ -2,0 +3,2 @@ I have a bit more text than added_lines.\n+Nobody expects the Spanish Inquisition !\n+My developer really wonders if cracking jokes in test data is against company policy." | ||
}, | ||
"deleted_lines.txt": { | ||
{ | ||
"path": "deleted_lines.txt", | ||
"type": "M", | ||
"binary": false, | ||
"patch": "@@ -2,2 +2 @@ Hmm, my name is deleted_lines.\n-I wonder what that could mean ?\n-I don't want to be shortened, that does not seem fun.\n+" | ||
}, | ||
"deleted_lines_middle.txt": { | ||
{ | ||
"path": "deleted_lines_middle.txt", | ||
"type": "M", | ||
"binary": false, | ||
"patch": "@@ -2,3 +1,0 @@ Hmm, my name is deleted_lines_middle.\n-I wonder what that could mean ?\n-I don't want to be shortened, that does not seem fun.\n-Thinking about it, being short is also kind of nice though." | ||
}, | ||
"edited_lines.txt": { | ||
{ | ||
"path": "edited_lines.txt", | ||
"type": "M", | ||
"binary": false, | ||
"patch": "@@ -4,8 +4,5 @@ I have a big block of text here:\n-Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n-Vestibulum ornare cursus diam, quis hendrerit nibh.\n-Donec sollicitudin neque in tempus ornare.\n-Integer sit amet pretium quam.\n-Maecenas lacinia augue id est malesuada, vitae fermentum justo faucibus.\n-Aenean posuere nisi tincidunt nisi pharetra blandit.\n-Integer sed nulla sed eros aliquet eleifend quis ac quam.\n-Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.\n+What kind of dev uses lorem ipsum ?\n+Keyboard mashing is where it's at !\n+Check this out:\n+phq w4q3t'p£tfnjklifewqkpjnoq23bjikkijq4ovikobjqv4kioblj;\n+vqpewkvnkqknbpjlo[iqervkb[jplofqvwer[kpjlqfp[okqolp[f;vn]]]]]" | ||
} | ||
} | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes it so the caller can directly instantiate the class without first forming a mapping which also simplifies the disk storage format so there is no longer a redundant top-level key.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, definitely makes it easier to use 👍