From 0f175766e27642108c65bba04bbd54dcf8799b0e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 16 Mar 2023 17:27:21 +0000 Subject: [PATCH] gh-99726: Improves correctness of stat results for Windows, and uses faster API when available (GH-102149) This deprecates `st_ctime` fields on Windows, with the intent to change them to contain the correct value in 3.14. For now, they should keep returning the creation time as they always have. --- Doc/library/os.rst | 88 +++++--- Doc/whatsnew/3.12.rst | 16 ++ Include/internal/pycore_fileutils.h | 5 +- Include/internal/pycore_fileutils_windows.h | 80 ++++++++ Lib/test/test_os.py | 12 +- ...3-02-22-17-26-10.gh-issue-99726.76t957.rst | 2 + Modules/posixmodule.c | 191 +++++++++++++----- PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/fileutils.c | 130 ++++++++++-- 10 files changed, 446 insertions(+), 82 deletions(-) create mode 100644 Include/internal/pycore_fileutils_windows.h create mode 100644 Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 5b9f49be1fad55..3153f79e10ce1f 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -2858,6 +2858,12 @@ features: Added support for the :class:`~os.PathLike` interface. Added support for :class:`bytes` paths on Windows. + .. versionchanged:: 3.12 + The ``st_ctime`` attribute of a stat result is deprecated on Windows. + The file creation time is properly available as ``st_birthtime``, and + in the future ``st_ctime`` may be changed to return zero or the + metadata change time, if available. + .. function:: stat(path, *, dir_fd=None, follow_symlinks=True) @@ -2973,10 +2979,12 @@ features: .. attribute:: st_ctime - Platform dependent: + Time of most recent metadata change expressed in seconds. - * the time of most recent metadata change on Unix, - * the time of creation on Windows, expressed in seconds. + .. versionchanged:: 3.12 + ``st_ctime`` is deprecated on Windows. Use ``st_birthtime`` for + the file creation time. In the future, ``st_ctime`` will contain + the time of the most recent metadata change, as for other platforms. .. attribute:: st_atime_ns @@ -2989,29 +2997,48 @@ features: .. attribute:: st_ctime_ns - Platform dependent: + Time of most recent metadata change expressed in nanoseconds as an + integer. + + .. versionchanged:: 3.12 + ``st_ctime_ns`` is deprecated on Windows. Use ``st_birthtime_ns`` + for the file creation time. In the future, ``st_ctime`` will contain + the time of the most recent metadata change, as for other platforms. + + .. attribute:: st_birthtime + + Time of file creation expressed in seconds. This attribute is not + always available, and may raise :exc:`AttributeError`. + + .. versionchanged:: 3.12 + ``st_birthtime`` is now available on Windows. + + .. attribute:: st_birthtime_ns - * the time of most recent metadata change on Unix, - * the time of creation on Windows, expressed in nanoseconds as an - integer. + Time of file creation expressed in nanoseconds as an integer. + This attribute is not always available, and may raise + :exc:`AttributeError`. + + .. versionadded:: 3.12 .. note:: The exact meaning and resolution of the :attr:`st_atime`, - :attr:`st_mtime`, and :attr:`st_ctime` attributes depend on the operating - system and the file system. For example, on Windows systems using the FAT - or FAT32 file systems, :attr:`st_mtime` has 2-second resolution, and - :attr:`st_atime` has only 1-day resolution. See your operating system - documentation for details. + :attr:`st_mtime`, :attr:`st_ctime` and :attr:`st_birthtime` attributes + depend on the operating system and the file system. For example, on + Windows systems using the FAT32 file systems, :attr:`st_mtime` has + 2-second resolution, and :attr:`st_atime` has only 1-day resolution. + See your operating system documentation for details. Similarly, although :attr:`st_atime_ns`, :attr:`st_mtime_ns`, - and :attr:`st_ctime_ns` are always expressed in nanoseconds, many - systems do not provide nanosecond precision. On systems that do - provide nanosecond precision, the floating-point object used to - store :attr:`st_atime`, :attr:`st_mtime`, and :attr:`st_ctime` - cannot preserve all of it, and as such will be slightly inexact. - If you need the exact timestamps you should always use - :attr:`st_atime_ns`, :attr:`st_mtime_ns`, and :attr:`st_ctime_ns`. + :attr:`st_ctime_ns` and :attr:`st_birthtime_ns` are always expressed in + nanoseconds, many systems do not provide nanosecond precision. On + systems that do provide nanosecond precision, the floating-point object + used to store :attr:`st_atime`, :attr:`st_mtime`, :attr:`st_ctime` and + :attr:`st_birthtime` cannot preserve all of it, and as such will be + slightly inexact. If you need the exact timestamps you should always use + :attr:`st_atime_ns`, :attr:`st_mtime_ns`, :attr:`st_ctime_ns` and + :attr:`st_birthtime_ns`. On some Unix systems (such as Linux), the following attributes may also be available: @@ -3041,10 +3068,6 @@ features: File generation number. - .. attribute:: st_birthtime - - Time of file creation. - On Solaris and derivatives, the following attributes may also be available: @@ -3117,6 +3140,25 @@ features: files as :const:`S_IFCHR`, :const:`S_IFIFO` or :const:`S_IFBLK` as appropriate. + .. versionchanged:: 3.12 + On Windows, :attr:`st_ctime` is deprecated. Eventually, it will + contain the last metadata change time, for consistency with other + platforms, but for now still contains creation time. + Use :attr:`st_birthtime` for the creation time. + + .. versionchanged:: 3.12 + On Windows, :attr:`st_ino` may now be up to 128 bits, depending + on the file system. Previously it would not be above 64 bits, and + larger file identifiers would be arbitrarily packed. + + .. versionchanged:: 3.12 + On Windows, :attr:`st_rdev` no longer returns a value. Previously + it would contain the same as :attr:`st_dev`, which was incorrect. + + .. versionadded:: 3.12 + Added the :attr:`st_birthtime` member on Windows. + + .. function:: statvfs(path) Perform a :c:func:`statvfs` system call on the given path. The return value is diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index b9c668543e1b73..03b1f975746787 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -306,6 +306,16 @@ os functions on Windows for enumerating drives, volumes and mount points. (Contributed by Steve Dower in :gh:`102519`.) +* :func:`os.stat` and :func:`os.lstat` are now more accurate on Windows. + The ``st_birthtime`` field will now be filled with the creation time + of the file, and ``st_ctime`` is deprecated but still contains the + creation time (but in the future will return the last metadata change, + for consistency with other platforms). ``st_dev`` may be up to 64 bits + and ``st_ino`` up to 128 bits depending on your file system, and + ``st_rdev`` is always set to zero rather than incorrect values. + Both functions may be significantly faster on newer releases of + Windows. (Contributed by Steve Dower in :gh:`99726`.) + os.path ------- @@ -469,6 +479,12 @@ Deprecated warning at compile time. This field will be removed in Python 3.14. (Contributed by Ramvikrams and Kumar Aditya in :gh:`101193`. PEP by Ken Jin.) +* The ``st_ctime`` fields return by :func:`os.stat` and :func:`os.lstat` on + Windows are deprecated. In a future release, they will contain the last + metadata change time, consistent with other platforms. For now, they still + contain the creation time, which is also available in the new ``st_birthtime`` + field. (Contributed by Steve Dower in :gh:`99726`.) + Pending Removal in Python 3.13 ------------------------------ diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 445ac0a3d955d9..ef6642d00f1b54 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -66,7 +66,7 @@ PyAPI_FUNC(PyObject *) _Py_device_encoding(int); #ifdef MS_WINDOWS struct _Py_stat_struct { - unsigned long st_dev; + uint64_t st_dev; uint64_t st_ino; unsigned short st_mode; int st_nlink; @@ -80,8 +80,11 @@ struct _Py_stat_struct { int st_mtime_nsec; time_t st_ctime; int st_ctime_nsec; + time_t st_birthtime; + int st_birthtime_nsec; unsigned long st_file_attributes; unsigned long st_reparse_tag; + uint64_t st_ino_high; }; #else # define _Py_stat_struct stat diff --git a/Include/internal/pycore_fileutils_windows.h b/Include/internal/pycore_fileutils_windows.h new file mode 100644 index 00000000000000..44874903b092f3 --- /dev/null +++ b/Include/internal/pycore_fileutils_windows.h @@ -0,0 +1,80 @@ +#ifndef Py_INTERNAL_FILEUTILS_WINDOWS_H +#define Py_INTERNAL_FILEUTILS_WINDOWS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "Py_BUILD_CORE must be defined to include this header" +#endif + +#ifdef MS_WINDOWS + +#if !defined(NTDDI_WIN10_NI) || !(NTDDI_VERSION >= NTDDI_WIN10_NI) +typedef struct _FILE_STAT_BASIC_INFORMATION { + LARGE_INTEGER FileId; + LARGE_INTEGER CreationTime; + LARGE_INTEGER LastAccessTime; + LARGE_INTEGER LastWriteTime; + LARGE_INTEGER ChangeTime; + LARGE_INTEGER AllocationSize; + LARGE_INTEGER EndOfFile; + ULONG FileAttributes; + ULONG ReparseTag; + ULONG NumberOfLinks; + ULONG DeviceType; + ULONG DeviceCharacteristics; + ULONG Reserved; + FILE_ID_128 FileId128; + LARGE_INTEGER VolumeSerialNumber; +} FILE_STAT_BASIC_INFORMATION; + +typedef enum _FILE_INFO_BY_NAME_CLASS { + FileStatByNameInfo, + FileStatLxByNameInfo, + FileCaseSensitiveByNameInfo, + FileStatBasicByNameInfo, + MaximumFileInfoByNameClass +} FILE_INFO_BY_NAME_CLASS; +#endif + +typedef BOOL (WINAPI *PGetFileInformationByName)( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +); + +static inline BOOL _Py_GetFileInformationByName( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +) { + static PGetFileInformationByName GetFileInformationByName = NULL; + static int GetFileInformationByName_init = -1; + + if (GetFileInformationByName_init < 0) { + HMODULE hMod = LoadLibraryW(L"api-ms-win-core-file-l2-1-4"); + GetFileInformationByName_init = 0; + if (hMod) { + GetFileInformationByName = (PGetFileInformationByName)GetProcAddress( + hMod, "GetFileInformationByName"); + if (GetFileInformationByName) { + GetFileInformationByName_init = 1; + } else { + FreeLibrary(hMod); + } + } + } + + if (GetFileInformationByName_init <= 0) { + SetLastError(ERROR_NOT_SUPPORTED); + return FALSE; + } + return GetFileInformationByName(FileName, FileInformationClass, FileInfoBuffer, FileInfoBufferSize); +} + +#endif + +#endif diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 42357fef80ec89..74ece3ffb4ed17 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -556,6 +556,15 @@ def trunc(x): return x nanosecondy = getattr(result, name + "_ns") // 10000 self.assertAlmostEqual(floaty, nanosecondy, delta=2) + # Ensure both birthtime and birthtime_ns roughly agree, if present + try: + floaty = int(result.st_birthtime * 100000) + nanosecondy = result.st_birthtime_ns // 10000 + except AttributeError: + pass + else: + self.assertAlmostEqual(floaty, nanosecondy, delta=2) + try: result[200] self.fail("No exception raised") @@ -4234,7 +4243,8 @@ def assert_stat_equal(self, stat1, stat2, skip_fields): for attr in dir(stat1): if not attr.startswith("st_"): continue - if attr in ("st_dev", "st_ino", "st_nlink"): + if attr in ("st_dev", "st_ino", "st_nlink", "st_ctime", + "st_ctime_ns"): continue self.assertEqual(getattr(stat1, attr), getattr(stat2, attr), diff --git a/Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst b/Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst new file mode 100644 index 00000000000000..e2578620017894 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst @@ -0,0 +1,2 @@ +Improves correctness of stat results for Windows, and uses faster API when +available diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 7d91f7e4bac76b..e38caf7cc0abee 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -41,6 +41,7 @@ #ifndef MS_WINDOWS # include "posixmodule.h" #else +# include "pycore_fileutils_windows.h" # include "winreparse.h" #endif @@ -668,8 +669,11 @@ PyOS_AfterFork(void) #ifdef MS_WINDOWS /* defined in fileutils.c */ void _Py_time_t_to_FILE_TIME(time_t, int, FILETIME *); -void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *, - ULONG, struct _Py_stat_struct *); +void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *, ULONG, + FILE_BASIC_INFO *, FILE_ID_INFO *, + struct _Py_stat_struct *); +void _Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *, + struct _Py_stat_struct *); #endif @@ -1819,12 +1823,39 @@ attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *re return TRUE; } + +static void +update_st_mode_from_path(const wchar_t *path, DWORD attr, + struct _Py_stat_struct *result) +{ + if (!(attr & FILE_ATTRIBUTE_DIRECTORY)) { + /* Fix the file execute permissions. This hack sets S_IEXEC if + the filename has an extension that is commonly used by files + that CreateProcessW can execute. A real implementation calls + GetSecurityInfo, OpenThreadToken/OpenProcessToken, and + AccessCheck to check for generic read, write, and execute + access. */ + const wchar_t *fileExtension = wcsrchr(path, '.'); + if (fileExtension) { + if (_wcsicmp(fileExtension, L".exe") == 0 || + _wcsicmp(fileExtension, L".bat") == 0 || + _wcsicmp(fileExtension, L".cmd") == 0 || + _wcsicmp(fileExtension, L".com") == 0) { + result->st_mode |= 0111; + } + } + } +} + + static int -win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, - BOOL traverse) +win32_xstat_slow_impl(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse) { HANDLE hFile; BY_HANDLE_FILE_INFORMATION fileInfo; + FILE_BASIC_INFO basicInfo; + FILE_ID_INFO idInfo; FILE_ATTRIBUTE_TAG_INFO tagInfo = { 0 }; DWORD fileType, error; BOOL isUnhandledTag = FALSE; @@ -1954,12 +1985,16 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, for an unhandled tag. */ } else if (!isUnhandledTag) { CloseHandle(hFile); - return win32_xstat_impl(path, result, TRUE); + return win32_xstat_slow_impl(path, result, TRUE); } } } - if (!GetFileInformationByHandle(hFile, &fileInfo)) { + if (!GetFileInformationByHandle(hFile, &fileInfo) || + !GetFileInformationByHandleEx(hFile, FileBasicInfo, + &basicInfo, sizeof(basicInfo)) || + !GetFileInformationByHandleEx(hFile, FileIdInfo, + &idInfo, sizeof(idInfo))) { switch (GetLastError()) { case ERROR_INVALID_PARAMETER: case ERROR_INVALID_FUNCTION: @@ -1975,25 +2010,8 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, } } - _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, result); - - if (!(fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - /* Fix the file execute permissions. This hack sets S_IEXEC if - the filename has an extension that is commonly used by files - that CreateProcessW can execute. A real implementation calls - GetSecurityInfo, OpenThreadToken/OpenProcessToken, and - AccessCheck to check for generic read, write, and execute - access. */ - const wchar_t *fileExtension = wcsrchr(path, '.'); - if (fileExtension) { - if (_wcsicmp(fileExtension, L".exe") == 0 || - _wcsicmp(fileExtension, L".bat") == 0 || - _wcsicmp(fileExtension, L".cmd") == 0 || - _wcsicmp(fileExtension, L".com") == 0) { - result->st_mode |= 0111; - } - } - } + _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, &basicInfo, &idInfo, result); + update_st_mode_from_path(path, fileInfo.dwFileAttributes, result); cleanup: if (hFile != INVALID_HANDLE_VALUE) { @@ -2010,6 +2028,39 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, return retval; } +static int +win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse) +{ + FILE_STAT_BASIC_INFORMATION statInfo; + if (_Py_GetFileInformationByName(path, FileStatBasicByNameInfo, + &statInfo, sizeof(statInfo))) { + if (// Cannot use fast path for reparse points ... + !(statInfo.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + // ... unless it's a name surrogate (symlink) and we're not following + || (!traverse && IsReparseTagNameSurrogate(statInfo.ReparseTag)) + ) { + _Py_stat_basic_info_to_stat(&statInfo, result); + update_st_mode_from_path(path, statInfo.FileAttributes, result); + return 0; + } + } else { + switch(GetLastError()) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_NOT_READY: + case ERROR_BAD_NET_NAME: + /* These errors aren't worth retrying with the slow path */ + return -1; + case ERROR_NOT_SUPPORTED: + /* indicates the API couldn't be loaded */ + break; + } + } + + return win32_xstat_slow_impl(path, result, traverse); +} + static int win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) { @@ -2017,6 +2068,10 @@ win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) setting it to a POSIX error. Callers should use GetLastError. */ int code = win32_xstat_impl(path, result, traverse); errno = 0; + + /* ctime is only deprecated from 3.12, so we copy birthtime across */ + result->st_ctime = result->st_birthtime; + result->st_ctime_nsec = result->st_birthtime_nsec; return code; } /* About the following functions: win32_lstat_w, win32_stat, win32_stat_w @@ -2087,9 +2142,12 @@ static PyStructSequence_Field stat_result_fields[] = { #ifdef HAVE_STRUCT_STAT_ST_GEN {"st_gen", "generation number"}, #endif -#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#if defined(HAVE_STRUCT_STAT_ST_BIRTHTIME) || defined(MS_WINDOWS) {"st_birthtime", "time of creation"}, #endif +#ifdef MS_WINDOWS + {"st_birthtime_ns", "time of creation in nanoseconds"}, +#endif #ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES {"st_file_attributes", "Windows file attribute bits"}, #endif @@ -2132,16 +2190,22 @@ static PyStructSequence_Field stat_result_fields[] = { #define ST_GEN_IDX ST_FLAGS_IDX #endif -#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#if defined(HAVE_STRUCT_STAT_ST_BIRTHTIME) || defined(MS_WINDOWS) #define ST_BIRTHTIME_IDX (ST_GEN_IDX+1) #else #define ST_BIRTHTIME_IDX ST_GEN_IDX #endif -#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES -#define ST_FILE_ATTRIBUTES_IDX (ST_BIRTHTIME_IDX+1) +#ifdef MS_WINDOWS +#define ST_BIRTHTIME_NS_IDX (ST_BIRTHTIME_IDX+1) #else -#define ST_FILE_ATTRIBUTES_IDX ST_BIRTHTIME_IDX +#define ST_BIRTHTIME_NS_IDX ST_BIRTHTIME_IDX +#endif + +#if defined(HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES) || defined(MS_WINDOWS) +#define ST_FILE_ATTRIBUTES_IDX (ST_BIRTHTIME_NS_IDX+1) +#else +#define ST_FILE_ATTRIBUTES_IDX ST_BIRTHTIME_NS_IDX #endif #ifdef HAVE_STRUCT_STAT_ST_FSTYPE @@ -2310,7 +2374,7 @@ _posix_free(void *module) } static void -fill_time(PyObject *module, PyObject *v, int index, time_t sec, unsigned long nsec) +fill_time(PyObject *module, PyObject *v, int s_index, int f_index, int ns_index, time_t sec, unsigned long nsec) { PyObject *s = _PyLong_FromTime_t(sec); PyObject *ns_fractional = PyLong_FromUnsignedLong(nsec); @@ -2334,12 +2398,18 @@ fill_time(PyObject *module, PyObject *v, int index, time_t sec, unsigned long ns goto exit; } - PyStructSequence_SET_ITEM(v, index, s); - PyStructSequence_SET_ITEM(v, index+3, float_s); - PyStructSequence_SET_ITEM(v, index+6, ns_total); - s = NULL; - float_s = NULL; - ns_total = NULL; + if (s_index >= 0) { + PyStructSequence_SET_ITEM(v, s_index, s); + s = NULL; + } + if (f_index >= 0) { + PyStructSequence_SET_ITEM(v, f_index, float_s); + float_s = NULL; + } + if (ns_index >= 0) { + PyStructSequence_SET_ITEM(v, ns_index, ns_total); + ns_total = NULL; + } exit: Py_XDECREF(s); Py_XDECREF(ns_fractional); @@ -2348,6 +2418,33 @@ fill_time(PyObject *module, PyObject *v, int index, time_t sec, unsigned long ns Py_XDECREF(float_s); } +#ifdef MS_WINDOWS +static PyObject* +_pystat_l128_from_l64_l64(uint64_t low, uint64_t high) +{ + PyObject *o_low = PyLong_FromUnsignedLongLong(low); + if (!o_low || !high) { + return o_low; + } + PyObject *o_high = PyLong_FromUnsignedLongLong(high); + PyObject *l64 = o_high ? PyLong_FromLong(64) : NULL; + if (!l64) { + Py_XDECREF(o_high); + Py_DECREF(o_low); + return NULL; + } + Py_SETREF(o_high, PyNumber_Lshift(o_high, l64)); + Py_DECREF(l64); + if (!o_high) { + Py_DECREF(o_low); + return NULL; + } + Py_SETREF(o_low, PyNumber_Add(o_low, o_high)); + Py_DECREF(o_high); + return o_low; +} +#endif + /* pack a system stat C structure into the Python stat tuple (used by posix_stat() and posix_fstat()) */ static PyObject* @@ -2360,12 +2457,13 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) return NULL; PyStructSequence_SET_ITEM(v, 0, PyLong_FromLong((long)st->st_mode)); +#ifdef MS_WINDOWS + PyStructSequence_SET_ITEM(v, 1, _pystat_l128_from_l64_l64(st->st_ino, st->st_ino_high)); + PyStructSequence_SET_ITEM(v, 2, PyLong_FromUnsignedLongLong(st->st_dev)); +#else static_assert(sizeof(unsigned long long) >= sizeof(st->st_ino), "stat.st_ino is larger than unsigned long long"); PyStructSequence_SET_ITEM(v, 1, PyLong_FromUnsignedLongLong(st->st_ino)); -#ifdef MS_WINDOWS - PyStructSequence_SET_ITEM(v, 2, PyLong_FromUnsignedLong(st->st_dev)); -#else PyStructSequence_SET_ITEM(v, 2, _PyLong_FromDev(st->st_dev)); #endif PyStructSequence_SET_ITEM(v, 3, PyLong_FromLong((long)st->st_nlink)); @@ -2395,9 +2493,9 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) #else ansec = mnsec = cnsec = 0; #endif - fill_time(module, v, 7, st->st_atime, ansec); - fill_time(module, v, 8, st->st_mtime, mnsec); - fill_time(module, v, 9, st->st_ctime, cnsec); + fill_time(module, v, 7, 10, 13, st->st_atime, ansec); + fill_time(module, v, 8, 11, 14, st->st_mtime, mnsec); + fill_time(module, v, 9, 12, 15, st->st_ctime, cnsec); #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE PyStructSequence_SET_ITEM(v, ST_BLKSIZE_IDX, @@ -2415,7 +2513,7 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) PyStructSequence_SET_ITEM(v, ST_GEN_IDX, PyLong_FromLong((long)st->st_gen)); #endif -#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#if defined(HAVE_STRUCT_STAT_ST_BIRTHTIME) { PyObject *val; unsigned long bsec,bnsec; @@ -2429,6 +2527,9 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) PyStructSequence_SET_ITEM(v, ST_BIRTHTIME_IDX, val); } +#elif defined(MS_WINDOWS) + fill_time(module, v, -1, ST_BIRTHTIME_IDX, ST_BIRTHTIME_NS_IDX, + st->st_birthtime, st->st_birthtime_nsec); #endif #ifdef HAVE_STRUCT_STAT_ST_FLAGS PyStructSequence_SET_ITEM(v, ST_FLAGS_IDX, @@ -14639,7 +14740,7 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) } find_data_to_file_info(dataW, &file_info, &reparse_tag); - _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat); + _Py_attribute_data_to_stat(&file_info, reparse_tag, NULL, NULL, &entry->win32_lstat); return (PyObject *)entry; diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 85dc8caa458ed9..0343d30a42cd6a 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -216,6 +216,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 98e7d59ba1020c..359463e1d9af75 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -555,6 +555,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/Python/fileutils.c b/Python/fileutils.c index f48b626b444016..969b7163b5ac18 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -8,6 +8,8 @@ #ifdef MS_WINDOWS # include # include +# include // FILE_DEVICE_* constants +# include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION # if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) # define PATHCCH_ALLOW_LONG_PATHS 0x01 # else @@ -1056,6 +1058,13 @@ FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); } +static void +LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out) +{ + *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t); +} + void _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) { @@ -1085,33 +1094,126 @@ attributes_to_mode(DWORD attr) return m; } + +typedef union { + FILE_ID_128 id; + struct { + uint64_t st_ino; + uint64_t st_ino_high; + }; +} id_128_to_ino; + + void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, + FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info, struct _Py_stat_struct *result) { memset(result, 0, sizeof(*result)); result->st_mode = attributes_to_mode(info->dwFileAttributes); result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; - result->st_dev = info->dwVolumeSerialNumber; - result->st_rdev = result->st_dev; - FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); - FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); - FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber; + result->st_rdev = 0; + /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */ + if (basic_info) { + LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec); + LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec); + } else { + FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + } result->st_nlink = info->nNumberOfLinks; - result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; + + if (id_info) { + id_128_to_ino file_id; + file_id.id = id_info->FileId; + result->st_ino = file_id.st_ino; + result->st_ino_high = file_id.st_ino_high; + } else { + /* should only occur for DirEntry_from_find_data, in which case the + index is likely to be zero anyway. */ + result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; + } + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will open other name surrogate reparse points without traversing them. To detect/handle these, check st_file_attributes and st_reparse_tag. */ result->st_reparse_tag = reparse_tag; if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && reparse_tag == IO_REPARSE_TAG_SYMLINK) { - /* first clear the S_IFMT bits */ - result->st_mode ^= (result->st_mode & S_IFMT); - /* now set the bits that make this a symlink */ - result->st_mode |= S_IFLNK; + /* set the bits that make this a symlink */ + result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK; } result->st_file_attributes = info->dwFileAttributes; } + +void +_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info, + struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->FileAttributes); + result->st_size = info->EndOfFile.QuadPart; + LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->NumberOfLinks; + result->st_dev = info->VolumeSerialNumber.QuadPart; + /* File systems with less than 128-bits zero pad into this field */ + id_128_to_ino file_id; + file_id.id = info->FileId128; + result->st_ino = file_id.st_ino; + result->st_ino_high = file_id.st_ino_high; + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will + open other name surrogate reparse points without traversing them. To + detect/handle these, check st_file_attributes and st_reparse_tag. */ + result->st_reparse_tag = info->ReparseTag; + if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + info->ReparseTag == IO_REPARSE_TAG_SYMLINK) { + /* set the bits that make this a symlink */ + result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK; + } + result->st_file_attributes = info->FileAttributes; + switch (info->DeviceType) { + case FILE_DEVICE_DISK: + case FILE_DEVICE_VIRTUAL_DISK: + case FILE_DEVICE_DFS: + case FILE_DEVICE_CD_ROM: + case FILE_DEVICE_CONTROLLER: + case FILE_DEVICE_DATALINK: + break; + case FILE_DEVICE_DISK_FILE_SYSTEM: + case FILE_DEVICE_CD_ROM_FILE_SYSTEM: + case FILE_DEVICE_NETWORK_FILE_SYSTEM: + result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */ + break; + case FILE_DEVICE_CONSOLE: + case FILE_DEVICE_NULL: + case FILE_DEVICE_KEYBOARD: + case FILE_DEVICE_MODEM: + case FILE_DEVICE_MOUSE: + case FILE_DEVICE_PARALLEL_PORT: + case FILE_DEVICE_PRINTER: + case FILE_DEVICE_SCREEN: + case FILE_DEVICE_SERIAL_PORT: + case FILE_DEVICE_SOUND: + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR; + break; + case FILE_DEVICE_NAMED_PIPE: + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO; + break; + default: + if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR; + } + break; + } +} + #endif /* Return information about a file. @@ -1131,6 +1233,8 @@ _Py_fstat_noraise(int fd, struct _Py_stat_struct *status) { #ifdef MS_WINDOWS BY_HANDLE_FILE_INFORMATION info; + FILE_BASIC_INFO basicInfo; + FILE_ID_INFO idInfo; HANDLE h; int type; @@ -1162,14 +1266,16 @@ _Py_fstat_noraise(int fd, struct _Py_stat_struct *status) return 0; } - if (!GetFileInformationByHandle(h, &info)) { + if (!GetFileInformationByHandle(h, &info) || + !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo)) || + !GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) { /* The Win32 error is already set, but we also set errno for callers who expect it */ errno = winerror_to_errno(GetLastError()); return -1; } - _Py_attribute_data_to_stat(&info, 0, status); + _Py_attribute_data_to_stat(&info, 0, &basicInfo, &idInfo, status); return 0; #else return fstat(fd, status);