From e3597180c74d4d1aae444ca42c1e1afdaec3f19a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 29 Nov 2023 00:14:54 -0500 Subject: [PATCH 1/6] Revert "Add xfail marks for IndexFile.from_tree failures" This removes the xfail marks from 8 tests that fail due to #1630, to be fixed in the subsequent commits. This reverts commit 6e477e3a06e4b11d43ea118a9f18cae03fa211fd. --- test/test_docs.py | 11 ++--------- test/test_fun.py | 17 +++-------------- test/test_index.py | 40 ---------------------------------------- test/test_refs.py | 11 ----------- 4 files changed, 5 insertions(+), 74 deletions(-) diff --git a/test/test_docs.py b/test/test_docs.py index 2ff1c794a..2f4b2e8d8 100644 --- a/test/test_docs.py +++ b/test/test_docs.py @@ -8,10 +8,11 @@ import pytest -from git.exc import GitCommandError from test.lib import TestBase from test.lib.helper import with_rw_directory +import os.path + class Tutorials(TestBase): def tearDown(self): @@ -206,14 +207,6 @@ def update(self, op_code, cur_count, max_count=None, message=""): assert sm.module_exists() # The submodule's working tree was checked out by update. # ![14-test_init_repo_object] - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_directory def test_references_and_objects(self, rw_dir): # [1-test_references_and_objects] diff --git a/test/test_fun.py b/test/test_fun.py index 8ea5b7e46..566bc9aae 100644 --- a/test/test_fun.py +++ b/test/test_fun.py @@ -3,13 +3,10 @@ from io import BytesIO from stat import S_IFDIR, S_IFREG, S_IFLNK, S_IXUSR -import os +from os import stat import os.path as osp -import pytest - from git import Git -from git.exc import GitCommandError from git.index import IndexFile from git.index.fun import ( aggressive_tree_merge, @@ -37,14 +34,6 @@ def _assert_index_entries(self, entries, trees): assert (entry.path, entry.stage) in index.entries # END assert entry matches fully - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) def test_aggressive_tree_merge(self): # Head tree with additions, removals and modification compared to its predecessor. odb = self.rorepo.odb @@ -302,12 +291,12 @@ def test_linked_worktree_traversal(self, rw_dir): rw_master.git.worktree("add", worktree_path, branch.name) dotgit = osp.join(worktree_path, ".git") - statbuf = os.stat(dotgit) + statbuf = stat(dotgit) self.assertTrue(statbuf.st_mode & S_IFREG) gitdir = find_worktree_git_dir(dotgit) self.assertIsNotNone(gitdir) - statbuf = os.stat(gitdir) + statbuf = stat(gitdir) self.assertTrue(statbuf.st_mode & S_IFDIR) def test_tree_entries_from_data_with_failing_name_decode_py3(self): diff --git a/test/test_index.py b/test/test_index.py index cd1c37efc..5bf34757a 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -284,14 +284,6 @@ def add_bad_blob(): except Exception as ex: assert "index.lock' could not be obtained" not in str(ex) - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_repo("0.1.6") def test_index_file_from_tree(self, rw_repo): common_ancestor_sha = "5117c9c8a4d3af19a9958677e45cda9269de1541" @@ -342,14 +334,6 @@ def test_index_file_from_tree(self, rw_repo): # END for each blob self.assertEqual(num_blobs, len(three_way_index.entries)) - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_repo("0.1.6") def test_index_merge_tree(self, rw_repo): # A bit out of place, but we need a different repo for this: @@ -412,14 +396,6 @@ def test_index_merge_tree(self, rw_repo): self.assertEqual(len(unmerged_blobs), 1) self.assertEqual(list(unmerged_blobs.keys())[0], manifest_key[0]) - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_repo("0.1.6") def test_index_file_diffing(self, rw_repo): # Default Index instance points to our index. @@ -554,14 +530,6 @@ def _count_existing(self, repo, files): # END num existing helper - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_repo("0.1.6") def test_index_mutation(self, rw_repo): index = rw_repo.index @@ -915,14 +883,6 @@ def make_paths(): for absfile in absfiles: assert osp.isfile(absfile) - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_repo("HEAD") def test_compare_write_tree(self, rw_repo): """Test writing all trees, comparing them for equality.""" diff --git a/test/test_refs.py b/test/test_refs.py index a1573c11b..6ee385007 100644 --- a/test/test_refs.py +++ b/test/test_refs.py @@ -4,11 +4,8 @@ # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ from itertools import chain -import os from pathlib import Path -import pytest - from git import ( Reference, Head, @@ -218,14 +215,6 @@ def test_head_checkout_detached_head(self, rw_repo): assert isinstance(res, SymbolicReference) assert res.name == "HEAD" - @pytest.mark.xfail( - os.name == "nt", - reason=( - "IndexFile.from_tree is broken on Windows (related to NamedTemporaryFile), see #1630.\n" - "'git read-tree --index-output=...' fails with 'fatal: unable to write new index file'." - ), - raises=GitCommandError, - ) @with_rw_repo("0.1.6") def test_head_reset(self, rw_repo): cur_head = rw_repo.head From b12fd4a8ea6a541efb06c45c83617ec25ea7bd8a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 6 Nov 2023 21:08:24 -0500 Subject: [PATCH 2/6] Let Windows subprocess open or rename onto temp file On Windows, a file created by NamedTemporaryFile cannot be reopened while already open, under most circumstances. This applies to attempts to open it both from the original process and from other processes. This differs from the behavior on other operating systems, and it works this way to help ensure the file can be automatically deleted (since having a file open on Windows usually prevents the file from being deleted, unlike on other OSes). However, this can cause problems when NamedTemporaryFile is used to produce a safe filename for accessing with an external process, as IndexFile.from_tree does. On Windows, git subprocess can't open and write to the file. This breaks IndexFile.from_tree on Windows. This is the cause of #1630 -- IndexFile.reset uses IndexFile.from_tree, and the Repo.index property returns an IndexFile instance, so Repo.index.reset fails -- and some other breakages. While passing delete=False to NamedTemporaryFile (and deleting separately) often overcomes that, it won't fix things here, because IndexFile.from_tree uses "git read-tree --index-output=". That needs more than the ability to open the file for write. It needs to be able to create or overwrite the given path by renaming an existing file to it. The description of "--index-output=" in git-read-tree(1) notes: The file must allow to be rename(2)ed into from a temporary file that is created next to the usual index file; typically this means it needs to be on the same filesystem as the index file itself, and you need write permission to the directories the index file and index output file are located in. On Windows, more is required: there must be no currently open file with that path, because on Windows, open files typically cannot be replaced, just as, on Windows, they typically cannot be deleted (nor renamed). This is to say that passing delete=False to NamedTemporaryFile isn't enough to solve this because it merely causes NamedTemporaryFile to behave like the lower-level mkstemp function, leaving the responsibility of deletion to the caller but still *opening* the file -- and while the file is open, the git subprocess can't replace it. Switching to mkstemp, with no further change, would likewise not solve this. This commit is an initial fix for the problem, but not the best fix. On Windows, it closes the temporary file created by NamedTemporaryFile -- so the file can be opened by name, including by a git subprocess, and also overwritten, include by a file move. As currently implemented, this is not ideal, as it creates a race condition, because closing the file actually deletes it. During the time the file does not exist, the filename may end up being reused by another call to NamedTemporaryFile, mkstemp, or similar facility (in any process, written in any language) before the git subprocess can recreate it. Passing delete=False would avoid this, but then deletion would have to be handled separately, and at that point it is not obvious that NamedTemporaryFile is the best facility to use. This fix, though not yet adequate, confirms the impact on #1630. The following 8 tests go from failing to passing due to this change on a local Windows development machine (as listed in the summary at the end of pytest output): - test/test_docs.py:210 Tutorials.test_references_and_objects - test/test_fun.py:37 TestFun.test_aggressive_tree_merge - test/test_index.py:781 TestIndex.test_compare_write_tree - test/test_index.py:294 TestIndex.test_index_file_diffing - test/test_index.py:182 TestIndex.test_index_file_from_tree - test/test_index.py:232 TestIndex.test_index_merge_tree - test/test_index.py:428 TestIndex.test_index_mutation - test/test_refs.py:218 TestRefs.test_head_reset On CI, one test still fails, but later and for an unrelated reason: - test/test_index.py:428 TestIndex.test_index_mutation That `open(fake_symlink_path, "rt")` call raises FileNotFoundError. --- git/index/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git/index/base.py b/git/index/base.py index 94437ac88..aa9bfe994 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -362,6 +362,8 @@ def from_tree(cls, repo: "Repo", *treeish: Treeish, **kwargs: Any) -> "IndexFile # works - /tmp/ dirs could be on another device. with ExitStack() as stack: tmp_index = stack.enter_context(tempfile.NamedTemporaryFile(dir=repo.git_dir)) + if os.name == "nt": + tmp_index.close() arg_list.append("--index-output=%s" % tmp_index.name) arg_list.extend(treeish) From 12bbacee90f3810367a2ce986f42c3172c598c58 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 30 Nov 2023 14:38:10 -0500 Subject: [PATCH 3/6] Add xfail mark for new test_index_mutation failure The test assumes symlinks are never created on Windows. This often turns out to be correct, because core.symlinks defaults to false on Windows. (On some Windows systems, creating them is a privileged operation; this can be reconfigured, and unprivileged creation of symlinks is often enabled on systems used for development.) However, on a Windows system with core.symlinks set to true, git will create symlinks if it can, when checking out entries committed to a repository as symlinks. GitHub Actions runners for Windows do this ever since https://github.com/actions/runner-images/pull/1186 (the file is now at images/windows/scripts/build/Install-Git.ps1; the `/o:EnableSymlinks=Enabled` option continues to be passed, causing Git for Windows to be installed with core.symlinks set to true in the system scope). For now, this adds an xfail marking to test_index_mutation, for the FileNotFoundError raised when a symlink, which is expected not to be a symlink, is passed to `open`, causing an attempt to open its nonexistent target. (The check itself might bear refinement: as written, it reads the core.symlinks variable from any scope, including the local scope, which at the time of the check will usually be the cloned GitPython directory, where pytest is run.) While adding an import, this commit also improves the grouping and sorting of existing ones. --- test/test_index.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/test/test_index.py b/test/test_index.py index 5bf34757a..2f97f0af8 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -17,17 +17,21 @@ from sumtypes import constructor, sumtype from git import ( + BlobFilter, + Diff, + Git, IndexFile, + Object, Repo, - BlobFilter, - UnmergedEntriesError, Tree, - Object, - Diff, - GitCommandError, +) +from git.exc import ( CheckoutError, + GitCommandError, + HookExecutionError, + InvalidGitRepositoryError, + UnmergedEntriesError, ) -from git.exc import HookExecutionError, InvalidGitRepositoryError from git.index.fun import hook_path from git.index.typ import BaseIndexEntry, IndexEntry from git.objects import Blob @@ -530,6 +534,11 @@ def _count_existing(self, repo, files): # END num existing helper + @pytest.mark.xfail( + os.name == "nt" and Git().config("core.symlinks") == "true", + reason="Assumes symlinks are not created on Windows and opens a symlink to a nonexistent target.", + raises=FileNotFoundError, + ) @with_rw_repo("0.1.6") def test_index_mutation(self, rw_repo): index = rw_repo.index @@ -740,7 +749,7 @@ def mixed_iterator(): # END for each target # END real symlink test - # Add fake symlink and assure it checks-our as symlink. + # Add fake symlink and assure it checks out as a symlink. fake_symlink_relapath = "my_fake_symlink" link_target = "/etc/that" fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) @@ -774,7 +783,7 @@ def mixed_iterator(): os.remove(fake_symlink_path) index.checkout(fake_symlink_path) - # On Windows, we will never get symlinks. + # On Windows, we currently assume we will never get symlinks. if os.name == "nt": # Symlinks should contain the link as text (which is what a # symlink actually is). From 928ca1e9c39ffe3cef9e77cc09efc08dd7d47eee Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 7 Nov 2023 11:25:38 -0500 Subject: [PATCH 4/6] Minor formatting consistency improvement --- git/index/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git/index/base.py b/git/index/base.py index aa9bfe994..4fa0714d8 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -97,9 +97,8 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): - """ - An Index that can be manipulated using a native implementation in order to save git - command function calls wherever possible. + """An Index that can be manipulated using a native implementation in order to save + git command function calls wherever possible. This provides custom merging facilities allowing to merge without actually changing your index or your working tree. This way you can perform own test-merges based @@ -380,6 +379,7 @@ def from_tree(cls, repo: "Repo", *treeish: Treeish, **kwargs: Any) -> "IndexFile # END index merge handling # UTILITIES + @unbare_repo def _iter_expand_paths(self: "IndexFile", paths: Sequence[PathLike]) -> Iterator[PathLike]: """Expand the directories in list of paths to the corresponding paths accordingly. From 9e5d0aaa0542c0e861dbbb33d48225f4745f56a5 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 7 Nov 2023 16:54:44 -0500 Subject: [PATCH 5/6] Avoid subprocess-writable temp file race condition This lets the Windows subprocess open or rename onto the temporary file using a more robust approach than in b12fd4a, avoiding the race condition described there, where the filename could be inadvertently reused between deletion and recreation of the file. This creates a context manager helper for the temporary index file used in IndexFile.from_tree, whose implementation differs by operating system: - Delegating straightforwardly to NamedTempoaryFile on POSIX systems where an open file can replaced by having another file renamed to it (just as it can be deleted). - Employing custom logic on Windows, using mkstemp, closing the temporary file without immediately deleting it (so it won't be reused by any process seeking to create a temporary file), and then deleting it on context manager exit. IndexFile.from_tree now calls this helper instead of NamedTemporaryFile. For convenience, the helper provides the path, i.e. the "name", when entered, so tmp_index is now just that path. (At least for now, this helper is implemented as a nonpublic function in the git.index.base module, rather than in the git.index.util module. If it were public in git.index.util like the other facilities there, then some later changes to it, or its later removal, would be a breaking change to GitPython. If it were nonpublic in git.index.util, then this would not be a concern, but it would be unintuitive for it to be accessed from code in the git.index.base module. In the future, one way to address this might be to have one or more nonpublic _util modules with public members. Because it would still be a breaking change to drop existing public util modules, that would be more utility modules in total, so such a change isn't included here just for this one used-once function.) --- git/index/base.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/git/index/base.py b/git/index/base.py index 4fa0714d8..6c6462039 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -6,7 +6,7 @@ """Module containing IndexFile, an Index implementation facilitating all kinds of index manipulations such as querying and merging.""" -from contextlib import ExitStack +import contextlib import datetime import glob from io import BytesIO @@ -67,6 +67,7 @@ BinaryIO, Callable, Dict, + Generator, IO, Iterable, Iterator, @@ -96,6 +97,27 @@ __all__ = ("IndexFile", "CheckoutError", "StageType") +@contextlib.contextmanager +def _named_temporary_file_for_subprocess(directory: PathLike) -> Generator[str, None, None]: + """Create a named temporary file git subprocesses can open, deleting it afterward. + + :param directory: The directory in which the file is created. + + :return: A context manager object that creates the file and provides its name on + entry, and deletes it on exit. + """ + if os.name == "nt": + fd, name = tempfile.mkstemp(dir=directory) + os.close(fd) + try: + yield name + finally: + os.remove(name) + else: + with tempfile.NamedTemporaryFile(dir=directory) as ctx: + yield ctx.name + + class IndexFile(LazyMixin, git_diff.Diffable, Serializable): """An Index that can be manipulated using a native implementation in order to save git command function calls wherever possible. @@ -359,11 +381,9 @@ def from_tree(cls, repo: "Repo", *treeish: Treeish, **kwargs: Any) -> "IndexFile # tmp file created in git home directory to be sure renaming # works - /tmp/ dirs could be on another device. - with ExitStack() as stack: - tmp_index = stack.enter_context(tempfile.NamedTemporaryFile(dir=repo.git_dir)) - if os.name == "nt": - tmp_index.close() - arg_list.append("--index-output=%s" % tmp_index.name) + with contextlib.ExitStack() as stack: + tmp_index = stack.enter_context(_named_temporary_file_for_subprocess(repo.git_dir)) + arg_list.append("--index-output=%s" % tmp_index) arg_list.extend(treeish) # Move current index out of the way - otherwise the merge may fail @@ -373,7 +393,7 @@ def from_tree(cls, repo: "Repo", *treeish: Treeish, **kwargs: Any) -> "IndexFile stack.enter_context(TemporaryFileSwap(join_path_native(repo.git_dir, "index"))) repo.git.read_tree(*arg_list, **kwargs) - index = cls(repo, tmp_index.name) + index = cls(repo, tmp_index) index.entries # Force it to read the file as we will delete the temp-file. return index # END index merge handling From 782c06293f96b4a0b92f4a156a12cc506ea4eaf7 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 29 Nov 2023 00:53:37 -0500 Subject: [PATCH 6/6] Add myself to AUTHORS --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3e99ff785..3b97c9473 100644 --- a/AUTHORS +++ b/AUTHORS @@ -52,5 +52,6 @@ Contributors are: -Joseph Hale -Santos Gallegos -Wenhan Zhu +-Eliah Kagan Portions derived from other open source works and are clearly marked.