From 487a4fdb38efc82d30111e49f941a25cba4aa7a7 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 22:39:11 -0500 Subject: [PATCH 1/4] Add a test for git.index.util.TemporaryFileSwap This is a general test for TemporaryFileSwap, but by being parametrized by the type of file_path, it reveals a regression introduced in 9e86053 (#1770). TemporaryFileSwap still works when file_path is a string, but is now broken when it is a Path. That worked before, and the type annotations document that it should be able to work. This is at least a bug because TemporaryFileSwap is public. (I am unsure whether, in practice, GitPython itself uses it in a way that sometimes passes a Path object as file_path. But code that uses GitPython may call it directly and pass Path.) --- test/test_index.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/test/test_index.py b/test/test_index.py index 2f97f0af8..c3f3b4fae 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -34,10 +34,11 @@ ) from git.index.fun import hook_path from git.index.typ import BaseIndexEntry, IndexEntry +from git.index.util import TemporaryFileSwap from git.objects import Blob -from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo from git.util import Actor, hex_to_bin, rmtree from gitdb.base import IStream +from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo HOOKS_SHEBANG = "#!/usr/bin/env sh\n" @@ -1087,3 +1088,25 @@ def test_index_add_pathlike(self, rw_repo): file.touch() rw_repo.index.add(file) + + +class TestIndexUtils: + @pytest.mark.parametrize("file_path_type", [str, Path]) + def test_temporary_file_swap(self, tmp_path, file_path_type): + file_path = tmp_path / "foo" + file_path.write_bytes(b"some data") + + with TemporaryFileSwap(file_path_type(file_path)) as ctx: + assert Path(ctx.file_path) == file_path + assert not file_path.exists() + + # Recreate it with new data, so we can observe that they're really separate. + file_path.write_bytes(b"other data") + + temp_file_path = Path(ctx.tmp_file_path) + assert temp_file_path.parent == file_path.parent + assert temp_file_path.name.startswith(file_path.name) + assert temp_file_path.read_bytes() == b"some data" + + assert not temp_file_path.exists() + assert file_path.read_bytes() == b"some data" # Not b"other data". From 1ddf953ea0d7625485ed02c90b03aae7f939ec46 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 22:58:17 -0500 Subject: [PATCH 2/4] Fix TemporaryFileSwap bug when file_path is a Path This fixes the regression introduced in 9e86053 (#1770) where the file_path argument to TemporaryFileSwap.__init__ could no longer be a Path object. The change also makes this truer to the code from before #1770, still without the race condition fixed there, in that str was called on file_path then as well. However, it is not clear that this is a good thing, because this is not an idiomatic use of mkstemp. The reason the `prefix` cannot be a Path is that it is expected to be a filename prefix, with leading directories given in the `dir` argument. --- git/index/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/index/util.py b/git/index/util.py index 1c3b1c4ad..03de80f71 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -41,7 +41,7 @@ class TemporaryFileSwap: def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - fd, self.tmp_file_path = tempfile.mkstemp(prefix=self.file_path, dir="") + fd, self.tmp_file_path = tempfile.mkstemp(prefix=str(self.file_path), dir="") os.close(fd) with contextlib.suppress(OSError): # It may be that the source does not exist. os.replace(self.file_path, self.tmp_file_path) From b438c459d91243b685f540ccb1c124260e9d28b9 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 23:29:14 -0500 Subject: [PATCH 3/4] Refactor TemporaryFileSwap.__init__ for clarity This changes the mkstemp call TemporaryFileSwap uses to atomically create (and thus reserve) the temporary file, so that it passes only a filename (i.e., basename) prefix as `prefix`, and passes all other path components (i.e., the directory) as `dir`. (If the original path has no directory, then `dir` is "" as before.) This makes the mkstemp call *slightly* more idiomatic. This also makes it clearer, as it is no longer using `prefix` for something that feels like it should be possible to pass as a Path object. Although mkstemp does not accept a Path as `prefix`, it does (as expected) accept one as `dir`. However, to keep the code simple, I'm passing str for both. The os.path.split function accepts both str and Path (since Python 3.6), and returns str objects, which are now used for the `dir` and `prefix` arguments to mkstemp. For unusual cases, this may technically not be a refactoring. For example, a file_path of "a/b//c" will be split into "a/b" and "c". If the automatically generated temporary file suffix is "xyz", then that results in a tmp_file_path of "a/b/cxyz" where "a/b//cxyz" would have been used before. The tmp_file_path attribute of a TemporaryFileSwap object is public (and used in filesystem calls). However, no guarantee has ever been given that the temporary file path have the original path as an exact string prefix. I believe the slightly weaker relationship I expressed in the recently introduced test_temporary_file_swap -- another file in the same directory, named with the original filename with more characters, consisting of equivalent path components in the same order -- has always been the intended one. Note that this slight possible variation does not apply to the file_path attribute. That attribute is always kept exactly as it was, both in its type and its value, and it always used unmodified in calls that access the filesystem. --- git/index/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/git/index/util.py b/git/index/util.py index 03de80f71..61039fe7c 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -41,7 +41,8 @@ class TemporaryFileSwap: def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - fd, self.tmp_file_path = tempfile.mkstemp(prefix=str(self.file_path), dir="") + dirname, basename = osp.split(file_path) + fd, self.tmp_file_path = tempfile.mkstemp(prefix=basename, dir=dirname) os.close(fd) with contextlib.suppress(OSError): # It may be that the source does not exist. os.replace(self.file_path, self.tmp_file_path) From 4e91a6c7b521dc7d0331dbb5455e9c424d26d655 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 23:56:55 -0500 Subject: [PATCH 4/4] Tweak formatting for `@pytest.mark.parametrize` This removes the "fmt: off" / "fmt: on" directives around the `@pytest.mark.parametrize` decoration on test_blob_filter, and reformats it with black, for consistency with other such decorations. The style used there, *if* it could be specified as a rule and thus used without "fmt:" directives, may be nicer than how black formats multi-line mark decorations. However, since that decoration was written, there have been a number of other such decorations, which have been in black style. This also removes the only (or only remaining?) "fmt:" directive in the codebase. As such, it should possibly have been done in #1760. --- test/test_blob_filter.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/test_blob_filter.py b/test/test_blob_filter.py index a91f211bf..ddd83079a 100644 --- a/test/test_blob_filter.py +++ b/test/test_blob_filter.py @@ -14,14 +14,15 @@ from git.types import PathLike -# fmt: off -@pytest.mark.parametrize('paths, path, expected_result', [ - ((Path("foo"),), Path("foo"), True), - ((Path("foo"),), Path("foo/bar"), True), - ((Path("foo/bar"),), Path("foo"), False), - ((Path("foo"), Path("bar")), Path("foo"), True), -]) -# fmt: on +@pytest.mark.parametrize( + "paths, path, expected_result", + [ + ((Path("foo"),), Path("foo"), True), + ((Path("foo"),), Path("foo/bar"), True), + ((Path("foo/bar"),), Path("foo"), False), + ((Path("foo"), Path("bar")), Path("foo"), True), + ], +) def test_blob_filter(paths: Sequence[PathLike], path: PathLike, expected_result: bool) -> None: """Test the blob filter.""" blob_filter = BlobFilter(paths)