Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix up the commit trailers functionality #1576

Merged
merged 5 commits into from
Apr 23, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 79 additions & 22 deletions git/objects/commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import os
from io import BytesIO
import logging
from collections import defaultdict


# typing ------------------------------------------------------------------
Expand Down Expand Up @@ -335,8 +336,70 @@ def stats(self) -> Stats:
return Stats._list_from_string(self.repo, text)

@property
def trailers(self) -> Dict:
"""Get the trailers of the message as dictionary
def trailers(self) -> Dict[str, str]:
"""Get the trailers of the message as a dictionary

Git messages can contain trailer information that are similar to RFC 822
e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers).

WARNING: This function only returns the latest instance of each trailer key
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's OK to deprecate it, and suggest using trailers_dict(). Then this warning can be removed as it will show up when using it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed - deprecated 78424b5

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not deprecation 😅, that's downright removal, which is a breaking change.

After taking a look I realize now that there is no built-in facility for marking properties or functions as deprecated. So I think documenting the deprecation is as good as it gets.

Sorry for the hassle, but once that is reverted, it's ready to merge.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries! trailers method is back - 9ef07a7

and will be deprecated soon. Please see either ``Commit.trailers_list`` or ``Commit.trailers_dict``.

:return:
Dictionary containing whitespace stripped trailer information.
Only the latest instance of each trailer key.
"""
return {
k: v[0] for k, v in self.trailers_dict.items()
}

@property
def trailers_list(self) -> List[str]:
"""Get the trailers of the message as a list

Git messages can contain trailer information that are similar to RFC 822
e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers).

This functions calls ``git interpret-trailers --parse`` onto the message
to extract the trailer information, returns the raw trailer data as a list.

Valid message with trailer::

Subject line

some body information

another information

key1: value1.1
key1: value1.2
key2 : value 2 with inner spaces


Returned list will look like this::

[
"key1: value1.1",
"key1: value1.2",
"key2 : value 2 with inner spaces",
]


:return:
List containing whitespace stripped trailer information.
"""
cmd = ["git", "interpret-trailers", "--parse"]
proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore
trailer: str = proc.communicate(str(self.message).encode())[0].decode()
trailer = trailer.strip()
if trailer:
return [t.strip() for t in trailer.split("\n")]

return []

@property
def trailers_dict(self) -> Dict[str, List[str]]:
"""Get the trailers of the message as a dictionary

Git messages can contain trailer information that are similar to RFC 822
e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers).
Expand All @@ -345,42 +408,36 @@ def trailers(self) -> Dict:
to extract the trailer information. The key value pairs are stripped of
leading and trailing whitespaces before they get saved into a dictionary.

Valid message with trailer:

.. code-block::
Valid message with trailer::

Subject line

some body information

another information

key1: value1
key1: value1.1
key1: value1.2
key2 : value 2 with inner spaces

dictionary will look like this:

.. code-block::
Returned dictionary will look like this::

{
"key1": "value1",
"key2": "value 2 with inner spaces"
"key1": ["value1.1", "value1.2"],
"key2": ["value 2 with inner spaces"],
}

:return: Dictionary containing whitespace stripped trailer information

:return:
Dictionary containing whitespace stripped trailer information.
Mapping trailer keys to a list of their corresponding values.
"""
d = {}
cmd = ["git", "interpret-trailers", "--parse"]
proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore
trailer: str = proc.communicate(str(self.message).encode())[0].decode()
if trailer.endswith("\n"):
trailer = trailer[0:-1]
if trailer != "":
for line in trailer.split("\n"):
key, value = line.split(":", 1)
d[key.strip()] = value.strip()
return d
d = defaultdict(list)
for trailer in self.trailers_list:
key, value = trailer.split(":", 1)
d[key.strip()].append(value.strip())
return dict(d)

@classmethod
def _iter_from_process_or_stream(cls, repo: "Repo", proc_or_stream: Union[Popen, IO]) -> Iterator["Commit"]:
Expand Down
83 changes: 47 additions & 36 deletions test/test_commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,52 +494,63 @@ def test_datetimes(self):

def test_trailers(self):
KEY_1 = "Hello"
VALUE_1 = "World"
VALUE_1_1 = "World"
VALUE_1_2 = "Another-World"
KEY_2 = "Key"
VALUE_2 = "Value with inner spaces"

# Check if KEY 1 & 2 with Value 1 & 2 is extracted from multiple msg variations
msgs = []
msgs.append(f"Subject\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n")
msgs.append(f"Subject\n \nSome body of a function\n \n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n")
msgs.append(
f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n"
)
msgs.append(
f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2} : {VALUE_2}\n"
)

# Check the following trailer example is extracted from multiple msg variations
TRAILER = f"{KEY_1}: {VALUE_1_1}\n{KEY_2}: {VALUE_2}\n{KEY_1}: {VALUE_1_2}"
msgs = [
f"Subject\n\n{TRAILER}\n",
f"Subject\n \nSome body of a function\n \n{TRAILER}\n",
f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{TRAILER}\n",
(
# check when trailer has inconsistent whitespace
f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n"
f"{KEY_1}:{VALUE_1_1}\n{KEY_2} : {VALUE_2}\n{KEY_1}: {VALUE_1_2}\n"
),
]
for msg in msgs:
commit = self.rorepo.commit("master")
commit = copy.copy(commit)
commit = copy.copy(self.rorepo.commit("master"))
commit.message = msg
assert KEY_1 in commit.trailers.keys()
assert KEY_2 in commit.trailers.keys()
assert commit.trailers[KEY_1] == VALUE_1
assert commit.trailers[KEY_2] == VALUE_2

# Check that trailer stays empty for multiple msg combinations
msgs = []
msgs.append(f"Subject\n")
msgs.append(f"Subject\n\nBody with some\nText\n")
msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n")
msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n")
msgs.append(f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n")
msgs.append(f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n")
assert commit.trailers_list == [
f"{KEY_1}: {VALUE_1_1}",
f"{KEY_2}: {VALUE_2}",
f"{KEY_1}: {VALUE_1_2}",
]
assert commit.trailers_dict == {
KEY_1: [VALUE_1_1, VALUE_1_2],
KEY_2: [VALUE_2],
}
assert commit.trailers == {
KEY_1: VALUE_1_1,
KEY_2: VALUE_2,
}

# check that trailer stays empty for multiple msg combinations
msgs = [
f"Subject\n",
f"Subject\n\nBody with some\nText\n",
f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n",
f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n",
f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n",
f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n",
]

for msg in msgs:
commit = self.rorepo.commit("master")
commit = copy.copy(commit)
commit = copy.copy(self.rorepo.commit("master"))
commit.message = msg
assert len(commit.trailers.keys()) == 0
assert commit.trailers_list == []
assert commit.trailers_dict == {}
assert commit.trailers == {}

# check that only the last key value paragraph is evaluated
commit = self.rorepo.commit("master")
commit = copy.copy(commit)
commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1}\n\n{KEY_2}: {VALUE_2}\n"
assert KEY_1 not in commit.trailers.keys()
assert KEY_2 in commit.trailers.keys()
assert commit.trailers[KEY_2] == VALUE_2
commit = copy.copy(self.rorepo.commit("master"))
commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n"
assert commit.trailers_list == [f"{KEY_2}: {VALUE_2}"]
assert commit.trailers_dict == {KEY_2: [VALUE_2]}
assert commit.trailers == {KEY_2: VALUE_2}

def test_commit_co_authors(self):
commit = copy.copy(self.rorepo.commit("4251bd5"))
Expand Down