From d2f7284189b5c1f867ebd58b0b879fa34406eefc Mon Sep 17 00:00:00 2001 From: Twist Date: Fri, 21 Apr 2023 22:16:32 +0100 Subject: [PATCH 1/5] Add trailers_list and trailers_list methods to fix the commit trailers functionality. Update trailers tests. --- git/objects/commit.py | 101 +++++++++++++++++++++++++++++++++--------- test/test_commit.py | 83 +++++++++++++++++++--------------- 2 files changed, 126 insertions(+), 58 deletions(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index 547e8fe82..50da0a105 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -26,6 +26,7 @@ import os from io import BytesIO import logging +from collections import defaultdict # typing ------------------------------------------------------------------ @@ -335,8 +336,70 @@ def stats(self) -> Stats: return Stats._list_from_string(self.repo, text) @property - def trailers(self) -> Dict: - """Get the trailers of the message as dictionary + def trailers(self) -> Dict[str, str]: + """Get the trailers of the message as a dictionary + + Git messages can contain trailer information that are similar to RFC 822 + e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). + + WARNING: This function only returns the latest instance of each trailer key + and will be deprecated soon. Please see either ``Commit.trailers_list`` or ``Commit.trailers_dict``. + + :return: + Dictionary containing whitespace stripped trailer information. + Only the latest instance of each trailer key. + """ + return { + k: v[0] for k, v in self.trailers_dict.items() + } + + @property + def trailers_list(self) -> List[str]: + """Get the trailers of the message as a list + + Git messages can contain trailer information that are similar to RFC 822 + e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). + + This functions calls ``git interpret-trailers --parse`` onto the message + to extract the trailer information, returns the raw trailer data as a list. + + Valid message with trailer:: + + Subject line + + some body information + + another information + + key1: value1.1 + key1: value1.2 + key2 : value 2 with inner spaces + + + Returned list will look like this:: + + [ + "key1: value1.1", + "key1: value1.2", + "key2 : value 2 with inner spaces", + ] + + + :return: + List containing whitespace stripped trailer information. + """ + cmd = ["git", "interpret-trailers", "--parse"] + proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore + trailer: str = proc.communicate(str(self.message).encode())[0].decode() + trailer = trailer.strip() + if trailer: + return [t.strip() for t in trailer.split("\n")] + + return [] + + @property + def trailers_dict(self) -> Dict[str, List[str]]: + """Get the trailers of the message as a dictionary Git messages can contain trailer information that are similar to RFC 822 e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). @@ -345,9 +408,7 @@ def trailers(self) -> Dict: to extract the trailer information. The key value pairs are stripped of leading and trailing whitespaces before they get saved into a dictionary. - Valid message with trailer: - - .. code-block:: + Valid message with trailer:: Subject line @@ -355,32 +416,28 @@ def trailers(self) -> Dict: another information - key1: value1 + key1: value1.1 + key1: value1.2 key2 : value 2 with inner spaces - dictionary will look like this: - .. code-block:: + Returned dictionary will look like this:: { - "key1": "value1", - "key2": "value 2 with inner spaces" + "key1": ["value1.1", "value1.2"], + "key2": ["value 2 with inner spaces"], } - :return: Dictionary containing whitespace stripped trailer information + :return: + Dictionary containing whitespace stripped trailer information. + Mapping trailer keys to a list of their corresponding values. """ - d = {} - cmd = ["git", "interpret-trailers", "--parse"] - proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore - trailer: str = proc.communicate(str(self.message).encode())[0].decode() - if trailer.endswith("\n"): - trailer = trailer[0:-1] - if trailer != "": - for line in trailer.split("\n"): - key, value = line.split(":", 1) - d[key.strip()] = value.strip() - return d + d = defaultdict(list) + for trailer in self.trailers_list: + key, value = trailer.split(":", 1) + d[key.strip()].append(value.strip()) + return dict(d) @classmethod def _iter_from_process_or_stream(cls, repo: "Repo", proc_or_stream: Union[Popen, IO]) -> Iterator["Commit"]: diff --git a/test/test_commit.py b/test/test_commit.py index 1efc68897..ca1e4752b 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -494,52 +494,63 @@ def test_datetimes(self): def test_trailers(self): KEY_1 = "Hello" - VALUE_1 = "World" + VALUE_1_1 = "World" + VALUE_1_2 = "Another-World" KEY_2 = "Key" VALUE_2 = "Value with inner spaces" - # Check if KEY 1 & 2 with Value 1 & 2 is extracted from multiple msg variations - msgs = [] - msgs.append(f"Subject\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") - msgs.append(f"Subject\n \nSome body of a function\n \n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") - msgs.append( - f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n" - ) - msgs.append( - f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2} : {VALUE_2}\n" - ) - + # Check the following trailer example is extracted from multiple msg variations + TRAILER = f"{KEY_1}: {VALUE_1_1}\n{KEY_2}: {VALUE_2}\n{KEY_1}: {VALUE_1_2}" + msgs = [ + f"Subject\n\n{TRAILER}\n", + f"Subject\n \nSome body of a function\n \n{TRAILER}\n", + f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{TRAILER}\n", + ( + # check when trailer has inconsistent whitespace + f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n" + f"{KEY_1}:{VALUE_1_1}\n{KEY_2} : {VALUE_2}\n{KEY_1}: {VALUE_1_2}\n" + ), + ] for msg in msgs: - commit = self.rorepo.commit("master") - commit = copy.copy(commit) + commit = copy.copy(self.rorepo.commit("master")) commit.message = msg - assert KEY_1 in commit.trailers.keys() - assert KEY_2 in commit.trailers.keys() - assert commit.trailers[KEY_1] == VALUE_1 - assert commit.trailers[KEY_2] == VALUE_2 - - # Check that trailer stays empty for multiple msg combinations - msgs = [] - msgs.append(f"Subject\n") - msgs.append(f"Subject\n\nBody with some\nText\n") - msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n") - msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n") - msgs.append(f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n") - msgs.append(f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n") + assert commit.trailers_list == [ + f"{KEY_1}: {VALUE_1_1}", + f"{KEY_2}: {VALUE_2}", + f"{KEY_1}: {VALUE_1_2}", + ] + assert commit.trailers_dict == { + KEY_1: [VALUE_1_1, VALUE_1_2], + KEY_2: [VALUE_2], + } + assert commit.trailers == { + KEY_1: VALUE_1_1, + KEY_2: VALUE_2, + } + + # check that trailer stays empty for multiple msg combinations + msgs = [ + f"Subject\n", + f"Subject\n\nBody with some\nText\n", + f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n", + f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n", + f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n", + f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n", + ] for msg in msgs: - commit = self.rorepo.commit("master") - commit = copy.copy(commit) + commit = copy.copy(self.rorepo.commit("master")) commit.message = msg - assert len(commit.trailers.keys()) == 0 + assert commit.trailers_list == [] + assert commit.trailers_dict == {} + assert commit.trailers == {} # check that only the last key value paragraph is evaluated - commit = self.rorepo.commit("master") - commit = copy.copy(commit) - commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1}\n\n{KEY_2}: {VALUE_2}\n" - assert KEY_1 not in commit.trailers.keys() - assert KEY_2 in commit.trailers.keys() - assert commit.trailers[KEY_2] == VALUE_2 + commit = copy.copy(self.rorepo.commit("master")) + commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n" + assert commit.trailers_list == [f"{KEY_2}: {VALUE_2}"] + assert commit.trailers_dict == {KEY_2: [VALUE_2]} + assert commit.trailers == {KEY_2: VALUE_2} def test_commit_co_authors(self): commit = copy.copy(self.rorepo.commit("4251bd5")) From 78424b56654ad476da4bd2faf88d3875c5265e0d Mon Sep 17 00:00:00 2001 From: Twist Date: Sat, 22 Apr 2023 17:19:20 +0100 Subject: [PATCH 2/5] Deprecate Commit.trailers. --- git/objects/commit.py | 18 ------------------ test/test_commit.py | 6 ------ 2 files changed, 24 deletions(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index 50da0a105..1e3f751bc 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -335,24 +335,6 @@ def stats(self) -> Stats: text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True) return Stats._list_from_string(self.repo, text) - @property - def trailers(self) -> Dict[str, str]: - """Get the trailers of the message as a dictionary - - Git messages can contain trailer information that are similar to RFC 822 - e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). - - WARNING: This function only returns the latest instance of each trailer key - and will be deprecated soon. Please see either ``Commit.trailers_list`` or ``Commit.trailers_dict``. - - :return: - Dictionary containing whitespace stripped trailer information. - Only the latest instance of each trailer key. - """ - return { - k: v[0] for k, v in self.trailers_dict.items() - } - @property def trailers_list(self) -> List[str]: """Get the trailers of the message as a list diff --git a/test/test_commit.py b/test/test_commit.py index ca1e4752b..8d2ee754b 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -523,10 +523,6 @@ def test_trailers(self): KEY_1: [VALUE_1_1, VALUE_1_2], KEY_2: [VALUE_2], } - assert commit.trailers == { - KEY_1: VALUE_1_1, - KEY_2: VALUE_2, - } # check that trailer stays empty for multiple msg combinations msgs = [ @@ -543,14 +539,12 @@ def test_trailers(self): commit.message = msg assert commit.trailers_list == [] assert commit.trailers_dict == {} - assert commit.trailers == {} # check that only the last key value paragraph is evaluated commit = copy.copy(self.rorepo.commit("master")) commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n" assert commit.trailers_list == [f"{KEY_2}: {VALUE_2}"] assert commit.trailers_dict == {KEY_2: [VALUE_2]} - assert commit.trailers == {KEY_2: VALUE_2} def test_commit_co_authors(self): commit = copy.copy(self.rorepo.commit("4251bd5")) From abde3eafd293e8fa2ef2dc22d58ba5d80f1702e9 Mon Sep 17 00:00:00 2001 From: Twist Date: Sat, 22 Apr 2023 17:47:08 +0100 Subject: [PATCH 3/5] Update Commit.trailer_list to return tuples. --- git/objects/commit.py | 27 ++++++++++++++++----------- test/test_commit.py | 8 ++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index 1e3f751bc..b41a79951 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -336,7 +336,7 @@ def stats(self) -> Stats: return Stats._list_from_string(self.repo, text) @property - def trailers_list(self) -> List[str]: + def trailers_list(self) -> List[Tuple[str, str]]: """Get the trailers of the message as a list Git messages can contain trailer information that are similar to RFC 822 @@ -361,23 +361,29 @@ def trailers_list(self) -> List[str]: Returned list will look like this:: [ - "key1: value1.1", - "key1: value1.2", - "key2 : value 2 with inner spaces", + ("key1", "value1.1"), + ("key1", "value1.2"), + ("key2", "value 2 with inner spaces"), ] :return: - List containing whitespace stripped trailer information. + List containing key-value tuples of whitespace stripped trailer information. """ cmd = ["git", "interpret-trailers", "--parse"] proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore trailer: str = proc.communicate(str(self.message).encode())[0].decode() trailer = trailer.strip() - if trailer: - return [t.strip() for t in trailer.split("\n")] - return [] + if not trailer: + return [] + + trailer_list = [] + for t in trailer.split("\n"): + key, val = t.split(":", 1) + trailer_list.append((key.strip(), val.strip())) + + return trailer_list @property def trailers_dict(self) -> Dict[str, List[str]]: @@ -416,9 +422,8 @@ def trailers_dict(self) -> Dict[str, List[str]]: Mapping trailer keys to a list of their corresponding values. """ d = defaultdict(list) - for trailer in self.trailers_list: - key, value = trailer.split(":", 1) - d[key.strip()].append(value.strip()) + for key, val in self.trailers_list: + d[key].append(val) return dict(d) @classmethod diff --git a/test/test_commit.py b/test/test_commit.py index 8d2ee754b..4871902ec 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -515,9 +515,9 @@ def test_trailers(self): commit = copy.copy(self.rorepo.commit("master")) commit.message = msg assert commit.trailers_list == [ - f"{KEY_1}: {VALUE_1_1}", - f"{KEY_2}: {VALUE_2}", - f"{KEY_1}: {VALUE_1_2}", + (KEY_1, VALUE_1_1), + (KEY_2, VALUE_2), + (KEY_1, VALUE_1_2), ] assert commit.trailers_dict == { KEY_1: [VALUE_1_1, VALUE_1_2], @@ -543,7 +543,7 @@ def test_trailers(self): # check that only the last key value paragraph is evaluated commit = copy.copy(self.rorepo.commit("master")) commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n" - assert commit.trailers_list == [f"{KEY_2}: {VALUE_2}"] + assert commit.trailers_list == [(KEY_2, VALUE_2)] assert commit.trailers_dict == {KEY_2: [VALUE_2]} def test_commit_co_authors(self): From ed36bd903e1fdf45c53b52dbcb1b2d8444965d98 Mon Sep 17 00:00:00 2001 From: Twist Date: Sat, 22 Apr 2023 17:54:04 +0100 Subject: [PATCH 4/5] Specify encoding in Commit.trailer_list. --- git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/objects/commit.py b/git/objects/commit.py index b41a79951..f164becbc 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -372,7 +372,7 @@ def trailers_list(self) -> List[Tuple[str, str]]: """ cmd = ["git", "interpret-trailers", "--parse"] proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore - trailer: str = proc.communicate(str(self.message).encode())[0].decode() + trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8") trailer = trailer.strip() if not trailer: From 9ef07a731caf39684891bce5cc92c4e91829138d Mon Sep 17 00:00:00 2001 From: Twist Date: Sun, 23 Apr 2023 14:16:53 +0100 Subject: [PATCH 5/5] Revert the removal of Commit.trailers property. --- git/objects/commit.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/git/objects/commit.py b/git/objects/commit.py index f164becbc..138db0afe 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -335,6 +335,20 @@ def stats(self) -> Stats: text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True) return Stats._list_from_string(self.repo, text) + @property + def trailers(self) -> Dict[str, str]: + """Get the trailers of the message as a dictionary + + :note: This property is deprecated, please use either ``Commit.trailers_list`` or ``Commit.trailers_dict``. + + :return: + Dictionary containing whitespace stripped trailer information. + Only contains the latest instance of each trailer key. + """ + return { + k: v[0] for k, v in self.trailers_dict.items() + } + @property def trailers_list(self) -> List[Tuple[str, str]]: """Get the trailers of the message as a list