diff --git a/git/objects/commit.py b/git/objects/commit.py index 547e8fe82..138db0afe 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -26,6 +26,7 @@ import os from io import BytesIO import logging +from collections import defaultdict # typing ------------------------------------------------------------------ @@ -335,8 +336,72 @@ def stats(self) -> Stats: return Stats._list_from_string(self.repo, text) @property - def trailers(self) -> Dict: - """Get the trailers of the message as dictionary + def trailers(self) -> Dict[str, str]: + """Get the trailers of the message as a dictionary + + :note: This property is deprecated, please use either ``Commit.trailers_list`` or ``Commit.trailers_dict``. + + :return: + Dictionary containing whitespace stripped trailer information. + Only contains the latest instance of each trailer key. + """ + return { + k: v[0] for k, v in self.trailers_dict.items() + } + + @property + def trailers_list(self) -> List[Tuple[str, str]]: + """Get the trailers of the message as a list + + Git messages can contain trailer information that are similar to RFC 822 + e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). + + This functions calls ``git interpret-trailers --parse`` onto the message + to extract the trailer information, returns the raw trailer data as a list. + + Valid message with trailer:: + + Subject line + + some body information + + another information + + key1: value1.1 + key1: value1.2 + key2 : value 2 with inner spaces + + + Returned list will look like this:: + + [ + ("key1", "value1.1"), + ("key1", "value1.2"), + ("key2", "value 2 with inner spaces"), + ] + + + :return: + List containing key-value tuples of whitespace stripped trailer information. + """ + cmd = ["git", "interpret-trailers", "--parse"] + proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore + trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8") + trailer = trailer.strip() + + if not trailer: + return [] + + trailer_list = [] + for t in trailer.split("\n"): + key, val = t.split(":", 1) + trailer_list.append((key.strip(), val.strip())) + + return trailer_list + + @property + def trailers_dict(self) -> Dict[str, List[str]]: + """Get the trailers of the message as a dictionary Git messages can contain trailer information that are similar to RFC 822 e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). @@ -345,9 +410,7 @@ def trailers(self) -> Dict: to extract the trailer information. The key value pairs are stripped of leading and trailing whitespaces before they get saved into a dictionary. - Valid message with trailer: - - .. code-block:: + Valid message with trailer:: Subject line @@ -355,32 +418,27 @@ def trailers(self) -> Dict: another information - key1: value1 + key1: value1.1 + key1: value1.2 key2 : value 2 with inner spaces - dictionary will look like this: - .. code-block:: + Returned dictionary will look like this:: { - "key1": "value1", - "key2": "value 2 with inner spaces" + "key1": ["value1.1", "value1.2"], + "key2": ["value 2 with inner spaces"], } - :return: Dictionary containing whitespace stripped trailer information + :return: + Dictionary containing whitespace stripped trailer information. + Mapping trailer keys to a list of their corresponding values. """ - d = {} - cmd = ["git", "interpret-trailers", "--parse"] - proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore - trailer: str = proc.communicate(str(self.message).encode())[0].decode() - if trailer.endswith("\n"): - trailer = trailer[0:-1] - if trailer != "": - for line in trailer.split("\n"): - key, value = line.split(":", 1) - d[key.strip()] = value.strip() - return d + d = defaultdict(list) + for key, val in self.trailers_list: + d[key].append(val) + return dict(d) @classmethod def _iter_from_process_or_stream(cls, repo: "Repo", proc_or_stream: Union[Popen, IO]) -> Iterator["Commit"]: diff --git a/test/test_commit.py b/test/test_commit.py index 1efc68897..4871902ec 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -494,52 +494,57 @@ def test_datetimes(self): def test_trailers(self): KEY_1 = "Hello" - VALUE_1 = "World" + VALUE_1_1 = "World" + VALUE_1_2 = "Another-World" KEY_2 = "Key" VALUE_2 = "Value with inner spaces" - # Check if KEY 1 & 2 with Value 1 & 2 is extracted from multiple msg variations - msgs = [] - msgs.append(f"Subject\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") - msgs.append(f"Subject\n \nSome body of a function\n \n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n") - msgs.append( - f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2}: {VALUE_2}\n" - ) - msgs.append( - f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n{KEY_1}: {VALUE_1}\n{KEY_2} : {VALUE_2}\n" - ) - + # Check the following trailer example is extracted from multiple msg variations + TRAILER = f"{KEY_1}: {VALUE_1_1}\n{KEY_2}: {VALUE_2}\n{KEY_1}: {VALUE_1_2}" + msgs = [ + f"Subject\n\n{TRAILER}\n", + f"Subject\n \nSome body of a function\n \n{TRAILER}\n", + f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{TRAILER}\n", + ( + # check when trailer has inconsistent whitespace + f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n" + f"{KEY_1}:{VALUE_1_1}\n{KEY_2} : {VALUE_2}\n{KEY_1}: {VALUE_1_2}\n" + ), + ] for msg in msgs: - commit = self.rorepo.commit("master") - commit = copy.copy(commit) + commit = copy.copy(self.rorepo.commit("master")) commit.message = msg - assert KEY_1 in commit.trailers.keys() - assert KEY_2 in commit.trailers.keys() - assert commit.trailers[KEY_1] == VALUE_1 - assert commit.trailers[KEY_2] == VALUE_2 - - # Check that trailer stays empty for multiple msg combinations - msgs = [] - msgs.append(f"Subject\n") - msgs.append(f"Subject\n\nBody with some\nText\n") - msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n") - msgs.append(f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n") - msgs.append(f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n") - msgs.append(f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n") + assert commit.trailers_list == [ + (KEY_1, VALUE_1_1), + (KEY_2, VALUE_2), + (KEY_1, VALUE_1_2), + ] + assert commit.trailers_dict == { + KEY_1: [VALUE_1_1, VALUE_1_2], + KEY_2: [VALUE_2], + } + + # check that trailer stays empty for multiple msg combinations + msgs = [ + f"Subject\n", + f"Subject\n\nBody with some\nText\n", + f"Subject\n\nBody with\nText\n\nContinuation but\n doesn't contain colon\n", + f"Subject\n\nBody with\nText\n\nContinuation but\n only contains one :\n", + f"Subject\n\nBody with\nText\n\nKey: Value\nLine without colon\n", + f"Subject\n\nBody with\nText\n\nLine without colon\nKey: Value\n", + ] for msg in msgs: - commit = self.rorepo.commit("master") - commit = copy.copy(commit) + commit = copy.copy(self.rorepo.commit("master")) commit.message = msg - assert len(commit.trailers.keys()) == 0 + assert commit.trailers_list == [] + assert commit.trailers_dict == {} # check that only the last key value paragraph is evaluated - commit = self.rorepo.commit("master") - commit = copy.copy(commit) - commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1}\n\n{KEY_2}: {VALUE_2}\n" - assert KEY_1 not in commit.trailers.keys() - assert KEY_2 in commit.trailers.keys() - assert commit.trailers[KEY_2] == VALUE_2 + commit = copy.copy(self.rorepo.commit("master")) + commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n" + assert commit.trailers_list == [(KEY_2, VALUE_2)] + assert commit.trailers_dict == {KEY_2: [VALUE_2]} def test_commit_co_authors(self): commit = copy.copy(self.rorepo.commit("4251bd5"))