Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docs/reference/api/git.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,12 @@
::: dda.utils.git.changeset.ChangeSet
options:
members:
- changes
- files
- paths
- added
- modified
- deleted
- changed
- digest
- from_iter
- from_patches

::: dda.utils.git.changeset.ChangedFile
Expand Down
37 changes: 1 addition & 36 deletions src/dda/config/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from dda.config.model.tools import ToolsConfig
from dda.config.model.update import UpdateConfig
from dda.config.model.user import UserConfig
from dda.utils.fs import Path
from dda.types.hooks import dec_hook, enc_hook


def _default_orgs() -> dict[str, OrgConfig]:
Expand Down Expand Up @@ -52,38 +52,3 @@ def get_default_toml_data() -> dict[str, Any]:
builtin_types=(datetime.datetime, datetime.date, datetime.time),
enc_hook=enc_hook,
)


def dec_hook(type: type[Any], obj: Any) -> Any: # noqa: A002
if type is Path:
return Path(obj)

from msgspec import convert

from dda.utils.git.changeset import ChangedFile, ChangeSet

if type is ChangeSet:
# Since the dict decode logic from msgspec is not called here we have to manually decode the keys and values
decoded_obj = {}
for key, value in obj.items():
decoded_key = dec_hook(Path, key)
decoded_value = convert(value, ChangedFile, dec_hook=dec_hook)
decoded_obj[decoded_key] = decoded_value
return ChangeSet(changes=decoded_obj)

message = f"Cannot decode: {obj!r}"
raise ValueError(message)


def enc_hook(obj: Any) -> Any:
if isinstance(obj, Path):
return str(obj)

from dda.utils.git.changeset import ChangeSet

# Encode ChangeSet objects as dicts
if isinstance(obj, ChangeSet):
return dict(obj.changes)

message = f"Cannot encode: {obj!r}"
raise NotImplementedError(message)
3 changes: 3 additions & 0 deletions src/dda/types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# SPDX-FileCopyrightText: 2025-present Datadog, Inc. <dev@datadoghq.com>
#
# SPDX-License-Identifier: MIT
47 changes: 47 additions & 0 deletions src/dda/types/hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-FileCopyrightText: 2025-present Datadog, Inc. <dev@datadoghq.com>
#
# SPDX-License-Identifier: MIT
from __future__ import annotations

from types import MappingProxyType
from typing import TYPE_CHECKING, Any

from msgspec import Struct

if TYPE_CHECKING:
from collections.abc import Callable


class Hook(Struct, frozen=True):
encode: Callable[[Any], Any]
decode: Callable[[Any], Any]


def register_type_hooks(
typ: type[Any],
*,
encode: Callable[[Any], Any],
decode: Callable[[Any], Any],
) -> None:
__HOOKS[typ] = Hook(encode=encode, decode=decode)


def enc_hook(obj: Any) -> Any:
if (registered_type := __HOOKS.get(type(obj))) is not None:
return registered_type.encode(obj)

message = f"Cannot encode: {obj!r}"
raise NotImplementedError(message)


def dec_hook(typ: type[Any], obj: Any) -> Any:
if (registered_type := __HOOKS.get(typ)) is not None:
return registered_type.decode(obj)

message = f"Cannot decode: {obj!r}"
raise ValueError(message)


__HOOKS: dict[type[Any], Hook] = {}

register_type_hooks(MappingProxyType, encode=dict, decode=MappingProxyType)
5 changes: 5 additions & 0 deletions src/dda/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from functools import cached_property
from typing import IO, TYPE_CHECKING, Any

from dda.types.hooks import register_type_hooks

if TYPE_CHECKING:
from collections.abc import Generator

Expand Down Expand Up @@ -189,3 +191,6 @@ def temp_file(suffix: str = "") -> Generator[Path, None, None]:

with NamedTemporaryFile(suffix=suffix) as f:
yield Path(f.name).resolve()


register_type_hooks(Path, encode=str, decode=Path)
104 changes: 47 additions & 57 deletions src/dda/utils/git/changeset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@

from enum import StrEnum
from functools import cached_property
from itertools import chain
from types import MappingProxyType
from typing import TYPE_CHECKING, Self

from msgspec import Struct
from msgspec import Struct, convert, to_builtins

from dda.types.hooks import dec_hook, enc_hook, register_type_hooks
from dda.utils.fs import Path

if TYPE_CHECKING:
from collections.abc import ItemsView, Iterable, Iterator, KeysView, ValuesView
from collections.abc import Iterable


class ChangeType(StrEnum):
Expand Down Expand Up @@ -58,78 +60,45 @@ class ChangeSet: # noqa: PLW1641
When considering the changes to the working directory, the untracked files are considered as added files.
"""

def __init__(self, changes: dict[Path, ChangedFile]) -> None:
self.__changes = MappingProxyType(changes)
def __init__(self, changed_files: Iterable[ChangedFile]) -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes it so the caller can directly instantiate the class without first forming a mapping which also simplifies the disk storage format so there is no longer a redundant top-level key.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, definitely makes it easier to use 👍

self.__changed = MappingProxyType({str(c.path): c for c in changed_files})
self.__files = tuple(self.__changed.values())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why a tuple instead of a set ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to maintain the order.


@property
def changes(self) -> MappingProxyType[Path, ChangedFile]:
return self.__changes
def paths(self) -> MappingProxyType[str, ChangedFile]:
Copy link
Contributor Author

@ofek ofek Oct 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This provides the previous interface of all changes being represented by a mapping of relative paths to the ChangedFile class and removes the requirement for pass-through dictionary methods. The keys are now strings because msgspec actually doesn't support any other types for dictionary keys which is why the decoding hook had to manually coerce the type.

A benefit to this is that callers possessing str instances no longer have to import the Path class for membership checks.

return self.__changed

def keys(self) -> KeysView[Path]:
return self.__changes.keys()

def values(self) -> ValuesView[ChangedFile]:
return self.__changes.values()

def items(self) -> ItemsView[Path, ChangedFile]:
return self.__changes.items()

def __getitem__(self, key: Path) -> ChangedFile:
return self.__changes[key]

def __contains__(self, key: Path) -> bool:
return key in self.__changes

def __len__(self) -> int:
return len(self.__changes)

def __iter__(self) -> Iterator[Path]:
return iter(self.__changes.keys())

def __or__(self, other: Self) -> Self:
return self.from_iter(list(self.values()) + list(other.values()))

def __eq__(self, other: object) -> bool:
return isinstance(other, ChangeSet) and self.__changes == other.__changes

@cached_property
def added(self) -> set[Path]:
"""List of files that were added."""
return {change.path for change in self.values() if change.type == ChangeType.ADDED}
@property
def files(self) -> Iterable[ChangedFile]:
return self.__files

@cached_property
def modified(self) -> set[Path]:
"""List of files that were modified."""
return {change.path for change in self.values() if change.type == ChangeType.MODIFIED}
@property
def added(self) -> MappingProxyType[str, ChangedFile]:
"""Set of files that were added."""
return self.__change_types[ChangeType.ADDED]

@cached_property
def deleted(self) -> set[Path]:
"""List of files that were deleted."""
return {change.path for change in self.values() if change.type == ChangeType.DELETED}
@property
def modified(self) -> MappingProxyType[str, ChangedFile]:
"""Set of files that were modified."""
return self.__change_types[ChangeType.MODIFIED]
Comment on lines +80 to +83
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should convert our keys to Path objects within these methods, if the expectation is that our callers will mostly be using Paths instead of raw strs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so then they can always str(p) whereas membership checks for callers that only have str objects would require an import.


@cached_property
def changed(self) -> set[Path]:
"""List of files that were changed (added, modified, or deleted)."""
return set(self.keys())
@property
def deleted(self) -> MappingProxyType[str, ChangedFile]:
"""Set of files that were deleted."""
return self.__change_types[ChangeType.DELETED]

def digest(self) -> str:
"""Compute a hash of the changeset."""
from hashlib import sha256

digester = sha256()
for change in sorted(self.values(), key=lambda x: x.path.as_posix()):
for change in sorted(self.files, key=lambda cf: cf.path):
digester.update(change.path.as_posix().encode())
digester.update(change.type.value.encode())
digester.update(change.patch.encode())

return str(digester.hexdigest())

@classmethod
def from_iter(cls, data: Iterable[ChangedFile]) -> Self:
"""Create a ChangeSet from an iterable of FileChanges."""
items = {change.path: change for change in data}
return cls(changes=items)

@classmethod
def from_patches(cls, diff_output: str | list[str]) -> Self:
"""
Expand Down Expand Up @@ -182,7 +151,21 @@ def from_patches(cls, diff_output: str | list[str]) -> Self:
# Strip every "block" and add the missing separator
patch = "" if binary else "\n".join([sep + block.strip() for block in blocks]).strip()
changes.append(ChangedFile(path=current_file, type=current_type, binary=binary, patch=patch))
return cls.from_iter(changes)
return cls(changes)

def __or__(self, other: Self) -> Self:
return type(self)(chain(self.files, other.files))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't really find a way to satisfy this other than having a classmethod (like the removed from_iter) that returns an instance of cls.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems very clean to me like this anyway, good trick !


def __eq__(self, other: object) -> bool:
return isinstance(other, ChangeSet) and self.paths == other.paths

@cached_property
def __change_types(self) -> dict[ChangeType, MappingProxyType[str, ChangedFile]]:
changes: dict[ChangeType, dict[str, ChangedFile]] = {}
for change in self.files:
changes.setdefault(change.type, {})[str(change.path)] = change

return {change_type: MappingProxyType(paths) for change_type, paths in changes.items()}


def _determine_change_type(before_filename: str, after_filename: str) -> ChangeType:
Expand All @@ -195,3 +178,10 @@ def _determine_change_type(before_filename: str, after_filename: str) -> ChangeT

msg = f"Unexpected file paths in git diff output: {before_filename} -> {after_filename} - this indicates a rename which we do not support"
raise ValueError(msg)


register_type_hooks(
ChangeSet,
encode=lambda obj: to_builtins(obj.files, enc_hook=enc_hook),
decode=lambda obj: ChangeSet(convert(cf, ChangedFile, dec_hook=dec_hook) for cf in obj),
)
2 changes: 1 addition & 1 deletion src/dda/utils/git/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def get_commit_and_changes_from_github(remote: Remote, sha1: str) -> tuple[Commi
data = client.get(get_commit_github_api_url(remote, sha1)).json()

# Compute ChangeSet
changes = ChangeSet.from_iter(
changes = ChangeSet(
ChangedFile(
path=Path(file_obj["filename"]),
type=get_change_type_from_github_status(file_obj["status"]),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
{
"file2.txt": {
[
{
"path": "file2.txt",
"type": "D",
"binary": false,
"patch": "@@ -1,4 +0,0 @@\n-I am file2 !\n-I feel like I take up space for nothing...\n-I have a feeling like I won't exist pretty soon :/\n-"
},
"file3.txt": {
{
"path": "file3.txt",
"type": "A",
"binary": false,
"patch": "@@ -0,0 +1,2 @@\n+I am file3 !\n+I'm new around here, hopefully everyone treats me nice :)"
},
"file4.txt": {
{
"path": "file4.txt",
"type": "M",
"binary": false,
"patch": "@@ -2 +2 @@ I am file4.\n-People often tell me I am unreliable.\n+People often tell me I am THE BEST.\n@@ -4,3 +4,2 @@ Things like:\n-- You always change !\n-- I can never count on you...\n-- I didn't recognize you !\n+- You rock !\n+- I wish I were you !\n@@ -8 +7,3 @@ Do you think they have a point ?\n-I'd need to look at my own history to know...\n+Pah ! Who am I kidding, they're OBVIOUSLY RIGHT.\n+Arrogance ? What is that, an italian ice cream flavor ?\n+Get outta here !"
},
"file5.txt": {
{
"path": "file5.txt",
"type": "D",
"binary": false,
"patch": "@@ -1,5 +0,0 @@\n-I am a humble file.\n-Soon I will change name.\n-I think I'll also take this as an opportunity to change myself.\n-New name, new me !\n-Or is that not how the saying goes ?"
},
"file5_new.txt": {
{
"path": "file5_new.txt",
"type": "A",
"binary": false,
"patch": "@@ -0,0 +1,5 @@\n+I am a humble file.\n+Hey I have a new name !\n+Wow, I feel much better now.\n+New name, new me !\n+Or is that not how the saying goes ?"
}
}
]
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"file2.txt": {
[
{
"path": "file2.txt",
"type": "A",
"binary": false,
"patch": "@@ -0,0 +1,3 @@\n+file2\n+I am a new file in the repo !\n+That's incredible."
}
}
]
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"file2.txt": {
[
{
"path": "file2.txt",
"type": "D",
"binary": false,
"patch": "@@ -1,3 +0,0 @@\n-file2\n-I will be deleted, unfortunately.\n-That's quite sad."
}
}
]
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
{
"added_lines.txt": {
[
{
"path": "added_lines.txt",
"type": "M",
"binary": false,
"patch": "@@ -2,0 +3,4 @@ I wonder what that could mean ?\n+\n+But of course !\n+I get some added lines.\n+That makes sense."
},
"added_lines_middle.txt": {
{
"path": "added_lines_middle.txt",
"type": "M",
"binary": false,
"patch": "@@ -2,0 +3,2 @@ I have a bit more text than added_lines.\n+Nobody expects the Spanish Inquisition !\n+My developer really wonders if cracking jokes in test data is against company policy."
},
"deleted_lines.txt": {
{
"path": "deleted_lines.txt",
"type": "M",
"binary": false,
"patch": "@@ -2,2 +2 @@ Hmm, my name is deleted_lines.\n-I wonder what that could mean ?\n-I don't want to be shortened, that does not seem fun.\n+"
},
"deleted_lines_middle.txt": {
{
"path": "deleted_lines_middle.txt",
"type": "M",
"binary": false,
"patch": "@@ -2,3 +1,0 @@ Hmm, my name is deleted_lines_middle.\n-I wonder what that could mean ?\n-I don't want to be shortened, that does not seem fun.\n-Thinking about it, being short is also kind of nice though."
},
"edited_lines.txt": {
{
"path": "edited_lines.txt",
"type": "M",
"binary": false,
"patch": "@@ -4,8 +4,5 @@ I have a big block of text here:\n-Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n-Vestibulum ornare cursus diam, quis hendrerit nibh.\n-Donec sollicitudin neque in tempus ornare.\n-Integer sit amet pretium quam.\n-Maecenas lacinia augue id est malesuada, vitae fermentum justo faucibus.\n-Aenean posuere nisi tincidunt nisi pharetra blandit.\n-Integer sed nulla sed eros aliquet eleifend quis ac quam.\n-Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.\n+What kind of dev uses lorem ipsum ?\n+Keyboard mashing is where it's at !\n+Check this out:\n+phq w4q3t'p£tfnjklifewqkpjnoq23bjikkijq4ovikobjqv4kioblj;\n+vqpewkvnkqknbpjlo[iqervkb[jplofqvwer[kpjlqfp[okqolp[f;vn]]]]]"
}
}
]
Loading