Skip to content

Commit 695d20c

Browse files
authored
Instate msgspec type registration mechanism (#199)
* Instate msgspec type registration mechanism * Update ChangeSet API * address review
1 parent 8d40ad2 commit 695d20c

File tree

16 files changed

+155
-159
lines changed

16 files changed

+155
-159
lines changed

docs/reference/api/git.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,12 @@
3434
::: dda.utils.git.changeset.ChangeSet
3535
options:
3636
members:
37-
- changes
37+
- files
38+
- paths
3839
- added
3940
- modified
4041
- deleted
41-
- changed
4242
- digest
43-
- from_iter
4443
- from_patches
4544

4645
::: dda.utils.git.changeset.ChangedFile

src/dda/config/model/__init__.py

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from dda.config.model.tools import ToolsConfig
1616
from dda.config.model.update import UpdateConfig
1717
from dda.config.model.user import UserConfig
18-
from dda.utils.fs import Path
18+
from dda.types.hooks import dec_hook, enc_hook
1919

2020

2121
def _default_orgs() -> dict[str, OrgConfig]:
@@ -52,38 +52,3 @@ def get_default_toml_data() -> dict[str, Any]:
5252
builtin_types=(datetime.datetime, datetime.date, datetime.time),
5353
enc_hook=enc_hook,
5454
)
55-
56-
57-
def dec_hook(type: type[Any], obj: Any) -> Any: # noqa: A002
58-
if type is Path:
59-
return Path(obj)
60-
61-
from msgspec import convert
62-
63-
from dda.utils.git.changeset import ChangedFile, ChangeSet
64-
65-
if type is ChangeSet:
66-
# Since the dict decode logic from msgspec is not called here we have to manually decode the keys and values
67-
decoded_obj = {}
68-
for key, value in obj.items():
69-
decoded_key = dec_hook(Path, key)
70-
decoded_value = convert(value, ChangedFile, dec_hook=dec_hook)
71-
decoded_obj[decoded_key] = decoded_value
72-
return ChangeSet(changes=decoded_obj)
73-
74-
message = f"Cannot decode: {obj!r}"
75-
raise ValueError(message)
76-
77-
78-
def enc_hook(obj: Any) -> Any:
79-
if isinstance(obj, Path):
80-
return str(obj)
81-
82-
from dda.utils.git.changeset import ChangeSet
83-
84-
# Encode ChangeSet objects as dicts
85-
if isinstance(obj, ChangeSet):
86-
return dict(obj.changes)
87-
88-
message = f"Cannot encode: {obj!r}"
89-
raise NotImplementedError(message)

src/dda/types/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# SPDX-FileCopyrightText: 2025-present Datadog, Inc. <dev@datadoghq.com>
2+
#
3+
# SPDX-License-Identifier: MIT

src/dda/types/hooks.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# SPDX-FileCopyrightText: 2025-present Datadog, Inc. <dev@datadoghq.com>
2+
#
3+
# SPDX-License-Identifier: MIT
4+
from __future__ import annotations
5+
6+
from types import MappingProxyType
7+
from typing import TYPE_CHECKING, Any
8+
9+
from msgspec import Struct
10+
11+
if TYPE_CHECKING:
12+
from collections.abc import Callable
13+
14+
15+
class Hook(Struct, frozen=True):
16+
encode: Callable[[Any], Any]
17+
decode: Callable[[Any], Any]
18+
19+
20+
def register_type_hooks(
21+
typ: type[Any],
22+
*,
23+
encode: Callable[[Any], Any],
24+
decode: Callable[[Any], Any],
25+
) -> None:
26+
__HOOKS[typ] = Hook(encode=encode, decode=decode)
27+
28+
29+
def enc_hook(obj: Any) -> Any:
30+
if (registered_type := __HOOKS.get(type(obj))) is not None:
31+
return registered_type.encode(obj)
32+
33+
message = f"Cannot encode: {obj!r}"
34+
raise NotImplementedError(message)
35+
36+
37+
def dec_hook(typ: type[Any], obj: Any) -> Any:
38+
if (registered_type := __HOOKS.get(typ)) is not None:
39+
return registered_type.decode(obj)
40+
41+
message = f"Cannot decode: {obj!r}"
42+
raise ValueError(message)
43+
44+
45+
__HOOKS: dict[type[Any], Hook] = {}
46+
47+
register_type_hooks(MappingProxyType, encode=dict, decode=MappingProxyType)

src/dda/utils/fs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from functools import cached_property
1111
from typing import IO, TYPE_CHECKING, Any
1212

13+
from dda.types.hooks import register_type_hooks
14+
1315
if TYPE_CHECKING:
1416
from collections.abc import Generator
1517

@@ -189,3 +191,6 @@ def temp_file(suffix: str = "") -> Generator[Path, None, None]:
189191

190192
with NamedTemporaryFile(suffix=suffix) as f:
191193
yield Path(f.name).resolve()
194+
195+
196+
register_type_hooks(Path, encode=str, decode=Path)

src/dda/utils/git/changeset.py

Lines changed: 47 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,17 @@
66

77
from enum import StrEnum
88
from functools import cached_property
9+
from itertools import chain
910
from types import MappingProxyType
1011
from typing import TYPE_CHECKING, Self
1112

12-
from msgspec import Struct
13+
from msgspec import Struct, convert, to_builtins
1314

15+
from dda.types.hooks import dec_hook, enc_hook, register_type_hooks
1416
from dda.utils.fs import Path
1517

1618
if TYPE_CHECKING:
17-
from collections.abc import ItemsView, Iterable, Iterator, KeysView, ValuesView
19+
from collections.abc import Iterable
1820

1921

2022
class ChangeType(StrEnum):
@@ -58,78 +60,45 @@ class ChangeSet: # noqa: PLW1641
5860
When considering the changes to the working directory, the untracked files are considered as added files.
5961
"""
6062

61-
def __init__(self, changes: dict[Path, ChangedFile]) -> None:
62-
self.__changes = MappingProxyType(changes)
63+
def __init__(self, changed_files: Iterable[ChangedFile]) -> None:
64+
self.__changed = MappingProxyType({str(c.path): c for c in changed_files})
65+
self.__files = tuple(self.__changed.values())
6366

6467
@property
65-
def changes(self) -> MappingProxyType[Path, ChangedFile]:
66-
return self.__changes
68+
def paths(self) -> MappingProxyType[str, ChangedFile]:
69+
return self.__changed
6770

68-
def keys(self) -> KeysView[Path]:
69-
return self.__changes.keys()
70-
71-
def values(self) -> ValuesView[ChangedFile]:
72-
return self.__changes.values()
73-
74-
def items(self) -> ItemsView[Path, ChangedFile]:
75-
return self.__changes.items()
76-
77-
def __getitem__(self, key: Path) -> ChangedFile:
78-
return self.__changes[key]
79-
80-
def __contains__(self, key: Path) -> bool:
81-
return key in self.__changes
82-
83-
def __len__(self) -> int:
84-
return len(self.__changes)
85-
86-
def __iter__(self) -> Iterator[Path]:
87-
return iter(self.__changes.keys())
88-
89-
def __or__(self, other: Self) -> Self:
90-
return self.from_iter(list(self.values()) + list(other.values()))
91-
92-
def __eq__(self, other: object) -> bool:
93-
return isinstance(other, ChangeSet) and self.__changes == other.__changes
94-
95-
@cached_property
96-
def added(self) -> set[Path]:
97-
"""List of files that were added."""
98-
return {change.path for change in self.values() if change.type == ChangeType.ADDED}
71+
@property
72+
def files(self) -> Iterable[ChangedFile]:
73+
return self.__files
9974

100-
@cached_property
101-
def modified(self) -> set[Path]:
102-
"""List of files that were modified."""
103-
return {change.path for change in self.values() if change.type == ChangeType.MODIFIED}
75+
@property
76+
def added(self) -> MappingProxyType[str, ChangedFile]:
77+
"""Set of files that were added."""
78+
return self.__change_types[ChangeType.ADDED]
10479

105-
@cached_property
106-
def deleted(self) -> set[Path]:
107-
"""List of files that were deleted."""
108-
return {change.path for change in self.values() if change.type == ChangeType.DELETED}
80+
@property
81+
def modified(self) -> MappingProxyType[str, ChangedFile]:
82+
"""Set of files that were modified."""
83+
return self.__change_types[ChangeType.MODIFIED]
10984

110-
@cached_property
111-
def changed(self) -> set[Path]:
112-
"""List of files that were changed (added, modified, or deleted)."""
113-
return set(self.keys())
85+
@property
86+
def deleted(self) -> MappingProxyType[str, ChangedFile]:
87+
"""Set of files that were deleted."""
88+
return self.__change_types[ChangeType.DELETED]
11489

11590
def digest(self) -> str:
11691
"""Compute a hash of the changeset."""
11792
from hashlib import sha256
11893

11994
digester = sha256()
120-
for change in sorted(self.values(), key=lambda x: x.path.as_posix()):
95+
for change in sorted(self.files, key=lambda cf: cf.path):
12196
digester.update(change.path.as_posix().encode())
12297
digester.update(change.type.value.encode())
12398
digester.update(change.patch.encode())
12499

125100
return str(digester.hexdigest())
126101

127-
@classmethod
128-
def from_iter(cls, data: Iterable[ChangedFile]) -> Self:
129-
"""Create a ChangeSet from an iterable of FileChanges."""
130-
items = {change.path: change for change in data}
131-
return cls(changes=items)
132-
133102
@classmethod
134103
def from_patches(cls, diff_output: str | list[str]) -> Self:
135104
"""
@@ -182,7 +151,21 @@ def from_patches(cls, diff_output: str | list[str]) -> Self:
182151
# Strip every "block" and add the missing separator
183152
patch = "" if binary else "\n".join([sep + block.strip() for block in blocks]).strip()
184153
changes.append(ChangedFile(path=current_file, type=current_type, binary=binary, patch=patch))
185-
return cls.from_iter(changes)
154+
return cls(changes)
155+
156+
def __or__(self, other: Self) -> Self:
157+
return type(self)(chain(self.files, other.files))
158+
159+
def __eq__(self, other: object) -> bool:
160+
return isinstance(other, ChangeSet) and self.paths == other.paths
161+
162+
@cached_property
163+
def __change_types(self) -> dict[ChangeType, MappingProxyType[str, ChangedFile]]:
164+
changes: dict[ChangeType, dict[str, ChangedFile]] = {}
165+
for change in self.files:
166+
changes.setdefault(change.type, {})[str(change.path)] = change
167+
168+
return {change_type: MappingProxyType(paths) for change_type, paths in changes.items()}
186169

187170

188171
def _determine_change_type(before_filename: str, after_filename: str) -> ChangeType:
@@ -195,3 +178,10 @@ def _determine_change_type(before_filename: str, after_filename: str) -> ChangeT
195178

196179
msg = f"Unexpected file paths in git diff output: {before_filename} -> {after_filename} - this indicates a rename which we do not support"
197180
raise ValueError(msg)
181+
182+
183+
register_type_hooks(
184+
ChangeSet,
185+
encode=lambda obj: to_builtins(obj.files, enc_hook=enc_hook),
186+
decode=lambda obj: ChangeSet(convert(cf, ChangedFile, dec_hook=dec_hook) for cf in obj),
187+
)

src/dda/utils/git/github.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_commit_and_changes_from_github(remote: Remote, sha1: str) -> tuple[Commi
4444
data = client.get(get_commit_github_api_url(remote, sha1)).json()
4545

4646
# Compute ChangeSet
47-
changes = ChangeSet.from_iter(
47+
changes = ChangeSet(
4848
ChangedFile(
4949
path=Path(file_obj["filename"]),
5050
type=get_change_type_from_github_status(file_obj["status"]),
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
1-
{
2-
"file2.txt": {
1+
[
2+
{
33
"path": "file2.txt",
44
"type": "D",
55
"binary": false,
66
"patch": "@@ -1,4 +0,0 @@\n-I am file2 !\n-I feel like I take up space for nothing...\n-I have a feeling like I won't exist pretty soon :/\n-"
77
},
8-
"file3.txt": {
8+
{
99
"path": "file3.txt",
1010
"type": "A",
1111
"binary": false,
1212
"patch": "@@ -0,0 +1,2 @@\n+I am file3 !\n+I'm new around here, hopefully everyone treats me nice :)"
1313
},
14-
"file4.txt": {
14+
{
1515
"path": "file4.txt",
1616
"type": "M",
1717
"binary": false,
1818
"patch": "@@ -2 +2 @@ I am file4.\n-People often tell me I am unreliable.\n+People often tell me I am THE BEST.\n@@ -4,3 +4,2 @@ Things like:\n-- You always change !\n-- I can never count on you...\n-- I didn't recognize you !\n+- You rock !\n+- I wish I were you !\n@@ -8 +7,3 @@ Do you think they have a point ?\n-I'd need to look at my own history to know...\n+Pah ! Who am I kidding, they're OBVIOUSLY RIGHT.\n+Arrogance ? What is that, an italian ice cream flavor ?\n+Get outta here !"
1919
},
20-
"file5.txt": {
20+
{
2121
"path": "file5.txt",
2222
"type": "D",
2323
"binary": false,
2424
"patch": "@@ -1,5 +0,0 @@\n-I am a humble file.\n-Soon I will change name.\n-I think I'll also take this as an opportunity to change myself.\n-New name, new me !\n-Or is that not how the saying goes ?"
2525
},
26-
"file5_new.txt": {
26+
{
2727
"path": "file5_new.txt",
2828
"type": "A",
2929
"binary": false,
3030
"patch": "@@ -0,0 +1,5 @@\n+I am a humble file.\n+Hey I have a new name !\n+Wow, I feel much better now.\n+New name, new me !\n+Or is that not how the saying goes ?"
3131
}
32-
}
32+
]
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
{
2-
"file2.txt": {
1+
[
2+
{
33
"path": "file2.txt",
44
"type": "A",
55
"binary": false,
66
"patch": "@@ -0,0 +1,3 @@\n+file2\n+I am a new file in the repo !\n+That's incredible."
77
}
8-
}
8+
]
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
{
2-
"file2.txt": {
1+
[
2+
{
33
"path": "file2.txt",
44
"type": "D",
55
"binary": false,
66
"patch": "@@ -1,3 +0,0 @@\n-file2\n-I will be deleted, unfortunately.\n-That's quite sad."
77
}
8-
}
8+
]

0 commit comments

Comments
 (0)