From 935aa452359ac3f79febefcdb4387b962cf528af Mon Sep 17 00:00:00 2001 From: Charles Machalow Date: Tue, 4 Apr 2023 15:24:13 -0700 Subject: [PATCH] GH-75586: Make shutil.which() on Windows more consistent with the OS (GH-103179) --- Doc/library/shutil.rst | 34 +++++-- Doc/whatsnew/3.12.rst | 14 +++ Lib/shutil.py | 89 ++++++++++--------- Lib/test/test_shutil.py | 86 ++++++++++++++++-- ...3-04-02-22-04-26.gh-issue-75586.526iJm.rst | 1 + Modules/_winapi.c | 21 +++++ Modules/clinic/_winapi.c.h | 42 ++++++++- 7 files changed, 233 insertions(+), 54 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-02-22-04-26.gh-issue-75586.526iJm.rst diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index acba66258fe8f0..373cc7d6072031 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -433,23 +433,43 @@ Directory and files operations When no *path* is specified, the results of :func:`os.environ` are used, returning either the "PATH" value or a fallback of :attr:`os.defpath`. - On Windows, the current directory is always prepended to the *path* whether - or not you use the default or provide your own, which is the behavior the - command shell uses when finding executables. Additionally, when finding the - *cmd* in the *path*, the ``PATHEXT`` environment variable is checked. For - example, if you call ``shutil.which("python")``, :func:`which` will search - ``PATHEXT`` to know that it should look for ``python.exe`` within the *path* - directories. For example, on Windows:: + On Windows, the current directory is prepended to the *path* if *mode* does + not include ``os.X_OK``. When the *mode* does include ``os.X_OK``, the + Windows API ``NeedCurrentDirectoryForExePathW`` will be consulted to + determine if the current directory should be prepended to *path*. To avoid + consulting the current working directory for executables: set the environment + variable ``NoDefaultCurrentDirectoryInExePath``. + + Also on Windows, the ``PATHEXT`` variable is used to resolve commands + that may not already include an extension. For example, if you call + ``shutil.which("python")``, :func:`which` will search ``PATHEXT`` + to know that it should look for ``python.exe`` within the *path* + directories. For example, on Windows:: >>> shutil.which("python") 'C:\\Python33\\python.EXE' + This is also applied when *cmd* is a path that contains a directory + component:: + + >> shutil.which("C:\\Python33\\python") + 'C:\\Python33\\python.EXE' + .. versionadded:: 3.3 .. versionchanged:: 3.8 The :class:`bytes` type is now accepted. If *cmd* type is :class:`bytes`, the result type is also :class:`bytes`. + .. versionchanged:: 3.12 + On Windows, the current directory is no longer prepended to the search + path if *mode* includes ``os.X_OK`` and WinAPI + ``NeedCurrentDirectoryForExePathW(cmd)`` is false, else the current + directory is prepended even if it is already in the search path; + ``PATHEXT`` is used now even when *cmd* includes a directory component + or ends with an extension that is in ``PATHEXT``; and filenames that + have no extension can now be found. + .. exception:: Error This exception collects exceptions that are raised during a multi-file diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 88b99828f0beeb..cc2c0560a57c85 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -343,6 +343,20 @@ shutil will be removed in Python 3.14. (Contributed by Irit Katriel in :gh:`102828`.) +* :func:`shutil.which` now consults the *PATHEXT* environment variable to + find matches within *PATH* on Windows even when the given *cmd* includes + a directory component. + (Contributed by Charles Machalow in :gh:`103179`.) + + :func:`shutil.which` will call ``NeedCurrentDirectoryForExePathW`` when + querying for executables on Windows to determine if the current working + directory should be prepended to the search path. + (Contributed by Charles Machalow in :gh:`103179`.) + + :func:`shutil.which` will return a path matching the *cmd* with a component + from ``PATHEXT`` prior to a direct match elsewhere in the search path on + Windows. + (Contributed by Charles Machalow in :gh:`103179`.) sqlite3 ------- diff --git a/Lib/shutil.py b/Lib/shutil.py index b0576407e02ffb..8b378645a5a375 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -40,6 +40,9 @@ elif _WINDOWS: import nt +if sys.platform == 'win32': + import _winapi + COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024 # This should never be removed, see rationale in: # https://bugs.python.org/issue43743#msg393429 @@ -1449,6 +1452,16 @@ def _access_check(fn, mode): and not os.path.isdir(fn)) +def _win_path_needs_curdir(cmd, mode): + """ + On Windows, we can use NeedCurrentDirectoryForExePath to figure out + if we should add the cwd to PATH when searching for executables if + the mode is executable. + """ + return (not (mode & os.X_OK)) or _winapi.NeedCurrentDirectoryForExePath( + os.fsdecode(cmd)) + + def which(cmd, mode=os.F_OK | os.X_OK, path=None): """Given a command, mode, and a PATH string, return the path which conforms to the given mode on the PATH, or None if there is no such @@ -1459,60 +1472,54 @@ def which(cmd, mode=os.F_OK | os.X_OK, path=None): path. """ - # If we're given a path with a directory part, look it up directly rather - # than referring to PATH directories. This includes checking relative to the - # current directory, e.g. ./script - if os.path.dirname(cmd): - if _access_check(cmd, mode): - return cmd - return None - use_bytes = isinstance(cmd, bytes) - if path is None: - path = os.environ.get("PATH", None) - if path is None: - try: - path = os.confstr("CS_PATH") - except (AttributeError, ValueError): - # os.confstr() or CS_PATH is not available - path = os.defpath - # bpo-35755: Don't use os.defpath if the PATH environment variable is - # set to an empty string - - # PATH='' doesn't match, whereas PATH=':' looks in the current directory - if not path: - return None - - if use_bytes: - path = os.fsencode(path) - path = path.split(os.fsencode(os.pathsep)) + # If we're given a path with a directory part, look it up directly rather + # than referring to PATH directories. This includes checking relative to + # the current directory, e.g. ./script + dirname, cmd = os.path.split(cmd) + if dirname: + path = [dirname] else: - path = os.fsdecode(path) - path = path.split(os.pathsep) + if path is None: + path = os.environ.get("PATH", None) + if path is None: + try: + path = os.confstr("CS_PATH") + except (AttributeError, ValueError): + # os.confstr() or CS_PATH is not available + path = os.defpath + # bpo-35755: Don't use os.defpath if the PATH environment variable + # is set to an empty string + + # PATH='' doesn't match, whereas PATH=':' looks in the current + # directory + if not path: + return None - if sys.platform == "win32": - # The current directory takes precedence on Windows. - curdir = os.curdir if use_bytes: - curdir = os.fsencode(curdir) - if curdir not in path: + path = os.fsencode(path) + path = path.split(os.fsencode(os.pathsep)) + else: + path = os.fsdecode(path) + path = path.split(os.pathsep) + + if sys.platform == "win32" and _win_path_needs_curdir(cmd, mode): + curdir = os.curdir + if use_bytes: + curdir = os.fsencode(curdir) path.insert(0, curdir) + if sys.platform == "win32": # PATHEXT is necessary to check on Windows. pathext_source = os.getenv("PATHEXT") or _WIN_DEFAULT_PATHEXT pathext = [ext for ext in pathext_source.split(os.pathsep) if ext] if use_bytes: pathext = [os.fsencode(ext) for ext in pathext] - # See if the given file matches any of the expected path extensions. - # This will allow us to short circuit when given "python.exe". - # If it does match, only test that one, otherwise we have to try - # others. - if any(cmd.lower().endswith(ext.lower()) for ext in pathext): - files = [cmd] - else: - files = [cmd + ext for ext in pathext] + + # Always try checking the originally given cmd, if it doesn't match, try pathext + files = [cmd] + [cmd + ext for ext in pathext] else: # On other platforms you don't have things like PATHEXT to tell you # what file suffixes are executable, so just pass on cmd as-is. diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 1c0589ced9ea89..9eaf167a9fa3c9 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2034,18 +2034,68 @@ def test_relative_cmd(self): rv = shutil.which(relpath, path=base_dir) self.assertIsNone(rv) - def test_cwd(self): + @unittest.skipUnless(sys.platform != "win32", + "test is for non win32") + def test_cwd_non_win32(self): # Issue #16957 base_dir = os.path.dirname(self.dir) with os_helper.change_cwd(path=self.dir): rv = shutil.which(self.file, path=base_dir) - if sys.platform == "win32": - # Windows: current directory implicitly on PATH + # non-win32: shouldn't match in the current directory. + self.assertIsNone(rv) + + @unittest.skipUnless(sys.platform == "win32", + "test is for win32") + def test_cwd_win32(self): + base_dir = os.path.dirname(self.dir) + with os_helper.change_cwd(path=self.dir): + with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): + rv = shutil.which(self.file, path=base_dir) + # Current directory implicitly on PATH self.assertEqual(rv, os.path.join(self.curdir, self.file)) - else: - # Other platforms: shouldn't match in the current directory. + with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=False): + rv = shutil.which(self.file, path=base_dir) + # Current directory not on PATH self.assertIsNone(rv) + @unittest.skipUnless(sys.platform == "win32", + "test is for win32") + def test_cwd_win32_added_before_all_other_path(self): + base_dir = pathlib.Path(os.fsdecode(self.dir)) + + elsewhere_in_path_dir = base_dir / 'dir1' + elsewhere_in_path_dir.mkdir() + match_elsewhere_in_path = elsewhere_in_path_dir / 'hello.exe' + match_elsewhere_in_path.touch() + + exe_in_cwd = base_dir / 'hello.exe' + exe_in_cwd.touch() + + with os_helper.change_cwd(path=base_dir): + with unittest.mock.patch('shutil._win_path_needs_curdir', return_value=True): + rv = shutil.which('hello.exe', path=elsewhere_in_path_dir) + + self.assertEqual(os.path.abspath(rv), os.path.abspath(exe_in_cwd)) + + @unittest.skipUnless(sys.platform == "win32", + "test is for win32") + def test_pathext_match_before_path_full_match(self): + base_dir = pathlib.Path(os.fsdecode(self.dir)) + dir1 = base_dir / 'dir1' + dir2 = base_dir / 'dir2' + dir1.mkdir() + dir2.mkdir() + + pathext_match = dir1 / 'hello.com.exe' + path_match = dir2 / 'hello.com' + pathext_match.touch() + path_match.touch() + + test_path = os.pathsep.join([str(dir1), str(dir2)]) + assert os.path.basename(shutil.which( + 'hello.com', path=test_path, mode = os.F_OK + )).lower() == 'hello.com.exe' + @os_helper.skip_if_dac_override def test_non_matching_mode(self): # Set the file read-only and ask for writeable files. @@ -2179,6 +2229,32 @@ def test_pathext_with_empty_str(self): rv = shutil.which(program, path=self.temp_dir) self.assertEqual(rv, temp_filexyz.name) + # See GH-75586 + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_pathext_applied_on_files_in_path(self): + with os_helper.EnvironmentVarGuard() as env: + env["PATH"] = self.temp_dir + env["PATHEXT"] = ".test" + + test_path = pathlib.Path(self.temp_dir) / "test_program.test" + test_path.touch(mode=0o755) + + self.assertEqual(shutil.which("test_program"), str(test_path)) + + # See GH-75586 + @unittest.skipUnless(sys.platform == "win32", 'test specific to Windows') + def test_win_path_needs_curdir(self): + with unittest.mock.patch('_winapi.NeedCurrentDirectoryForExePath', return_value=True) as need_curdir_mock: + self.assertTrue(shutil._win_path_needs_curdir('dontcare', os.X_OK)) + need_curdir_mock.assert_called_once_with('dontcare') + need_curdir_mock.reset_mock() + self.assertTrue(shutil._win_path_needs_curdir('dontcare', 0)) + need_curdir_mock.assert_not_called() + + with unittest.mock.patch('_winapi.NeedCurrentDirectoryForExePath', return_value=False) as need_curdir_mock: + self.assertFalse(shutil._win_path_needs_curdir('dontcare', os.X_OK)) + need_curdir_mock.assert_called_once_with('dontcare') + class TestWhichBytes(TestWhich): def setUp(self): diff --git a/Misc/NEWS.d/next/Library/2023-04-02-22-04-26.gh-issue-75586.526iJm.rst b/Misc/NEWS.d/next/Library/2023-04-02-22-04-26.gh-issue-75586.526iJm.rst new file mode 100644 index 00000000000000..8ec568ec4e4775 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-02-22-04-26.gh-issue-75586.526iJm.rst @@ -0,0 +1 @@ +Fix various Windows-specific issues with ``shutil.which``. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 83cde7501176b6..fa380b8b798405 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -2054,6 +2054,26 @@ _winapi__mimetypes_read_windows_registry_impl(PyObject *module, #undef CB_TYPE } +/*[clinic input] +_winapi.NeedCurrentDirectoryForExePath -> bool + + exe_name: LPCWSTR + / +[clinic start generated code]*/ + +static int +_winapi_NeedCurrentDirectoryForExePath_impl(PyObject *module, + LPCWSTR exe_name) +/*[clinic end generated code: output=a65ec879502b58fc input=972aac88a1ec2f00]*/ +{ + BOOL result; + + Py_BEGIN_ALLOW_THREADS + result = NeedCurrentDirectoryForExePathW(exe_name); + Py_END_ALLOW_THREADS + + return result; +} static PyMethodDef winapi_functions[] = { _WINAPI_CLOSEHANDLE_METHODDEF @@ -2089,6 +2109,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_GETACP_METHODDEF _WINAPI_GETFILETYPE_METHODDEF _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF + _WINAPI_NEEDCURRENTDIRECTORYFOREXEPATH_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 891b3f851d1243..7bc63e612be348 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -1371,4 +1371,44 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=edb1a9d1bbfd6394 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_winapi_NeedCurrentDirectoryForExePath__doc__, +"NeedCurrentDirectoryForExePath($module, exe_name, /)\n" +"--\n" +"\n"); + +#define _WINAPI_NEEDCURRENTDIRECTORYFOREXEPATH_METHODDEF \ + {"NeedCurrentDirectoryForExePath", (PyCFunction)_winapi_NeedCurrentDirectoryForExePath, METH_O, _winapi_NeedCurrentDirectoryForExePath__doc__}, + +static int +_winapi_NeedCurrentDirectoryForExePath_impl(PyObject *module, + LPCWSTR exe_name); + +static PyObject * +_winapi_NeedCurrentDirectoryForExePath(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + LPCWSTR exe_name = NULL; + int _return_value; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("NeedCurrentDirectoryForExePath", "argument", "str", arg); + goto exit; + } + exe_name = PyUnicode_AsWideCharString(arg, NULL); + if (exe_name == NULL) { + goto exit; + } + _return_value = _winapi_NeedCurrentDirectoryForExePath_impl(module, exe_name); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + /* Cleanup for exe_name */ + PyMem_Free((void *)exe_name); + + return return_value; +} +/*[clinic end generated code: output=96ea65ece7912d0a input=a9049054013a1b77]*/