From 65a15dce44eb75af57b87211712162072d2189ea Mon Sep 17 00:00:00 2001
From: Steve Dower <steve.dower@python.org>
Date: Fri, 10 Jun 2022 11:14:25 +0100
Subject: [PATCH 1/2] bpo-42658: Use LCMapStringEx in ntpath.normcase to match
 OS behaviour for case-folding (GH-93591)

* bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-32010)

* Use AsWideCharString to avoid memory leaks in deprectated unicode converter

Co-authored-by: AN Long <aisk@users.noreply.github.com>
---
 Lib/ntpath.py                                 | 42 ++++++++--
 Lib/test/test_ntpath.py                       |  2 +
 .../2022-03-20-15-47-35.bpo-42658.16eXtb.rst  |  3 +
 Modules/_winapi.c                             | 79 +++++++++++++++++++
 Modules/clinic/_winapi.c.h                    | 34 +++++++-
 5 files changed, 151 insertions(+), 9 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst

diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 527c7ae1938fbb..97edfa52aaafb8 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -23,6 +23,7 @@
 import genericpath
 from genericpath import *
 
+
 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
            "basename","dirname","commonprefix","getsize","getmtime",
            "getatime","getctime", "islink","exists","lexists","isdir","isfile",
@@ -41,14 +42,39 @@ def _get_bothseps(path):
 # Other normalizations (such as optimizing '../' away) are not done
 # (this is done by normpath).
 
-def normcase(s):
-    """Normalize case of pathname.
-
-    Makes all characters lowercase and all slashes into backslashes."""
-    s = os.fspath(s)
-    if isinstance(s, bytes):
-        return s.replace(b'/', b'\\').lower()
-    else:
+try:
+    from _winapi import (
+        LCMapStringEx as _LCMapStringEx,
+        LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT,
+        LCMAP_LOWERCASE as _LCMAP_LOWERCASE)
+
+    def normcase(s):
+        """Normalize case of pathname.
+
+        Makes all characters lowercase and all slashes into backslashes.
+        """
+        s = os.fspath(s)
+        if not s:
+            return s
+        if isinstance(s, bytes):
+            encoding = sys.getfilesystemencoding()
+            s = s.decode(encoding, 'surrogateescape').replace('/', '\\')
+            s = _LCMapStringEx(_LOCALE_NAME_INVARIANT,
+                               _LCMAP_LOWERCASE, s)
+            return s.encode(encoding, 'surrogateescape')
+        else:
+            return _LCMapStringEx(_LOCALE_NAME_INVARIANT,
+                                  _LCMAP_LOWERCASE,
+                                  s.replace('/', '\\'))
+except ImportError:
+    def normcase(s):
+        """Normalize case of pathname.
+
+        Makes all characters lowercase and all slashes into backslashes.
+        """
+        s = os.fspath(s)
+        if isinstance(s, bytes):
+            return os.fsencode(os.fsdecode(s).replace('/', '\\').lower())
         return s.replace('/', '\\').lower()
 
 
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 661c59d6171635..8258b7e44dc8bf 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -807,6 +807,8 @@ def _check_function(self, func):
 
     def test_path_normcase(self):
         self._check_function(self.path.normcase)
+        if sys.platform == 'win32':
+            self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ')
 
     def test_path_isabs(self):
         self._check_function(self.path.isabs)
diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst
new file mode 100644
index 00000000000000..852cc77676a31d
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst
@@ -0,0 +1,3 @@
+Support native Windows case-insensitive path comparisons by using
+``LCMapStringEx`` instead of :func:`str.lower` in :func:`ntpath.normcase`.
+Add ``LCMapStringEx`` to the :mod:`_winapi` module.
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 3e24d512cac384..a3c30f23955450 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1512,6 +1512,68 @@ _winapi_PeekNamedPipe_impl(PyObject *module, HANDLE handle, int size)
     }
 }
 
+/*[clinic input]
+_winapi.LCMapStringEx
+
+    locale: unicode
+    flags: DWORD
+    src: unicode
+
+[clinic start generated code]*/
+
+static PyObject *
+_winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
+                           PyObject *src)
+/*[clinic end generated code: output=8ea4c9d85a4a1f23 input=2fa6ebc92591731b]*/
+{
+    if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV |
+                 LCMAP_SORTKEY)) {
+        return PyErr_Format(PyExc_ValueError, "unsupported flags");
+    }
+
+    wchar_t *locale_ = PyUnicode_AsWideCharString(locale, NULL);
+    if (!locale_) {
+        return NULL;
+    }
+    wchar_t *src_ = PyUnicode_AsWideCharString(src, NULL);
+    if (!src_) {
+        PyMem_Free(locale_);
+        return NULL;
+    }
+
+    int dest_size = LCMapStringEx(locale_, flags, src_, -1, NULL, 0,
+                                  NULL, NULL, 0);
+    if (dest_size == 0) {
+        PyMem_Free(locale_);
+        PyMem_Free(src_);
+        return PyErr_SetFromWindowsErr(0);
+    }
+
+    wchar_t* dest = PyMem_NEW(wchar_t, dest_size);
+    if (dest == NULL) {
+        PyMem_Free(locale_);
+        PyMem_Free(src_);
+        return PyErr_NoMemory();
+    }
+
+    int nmapped = LCMapStringEx(locale_, flags, src_, -1, dest, dest_size,
+                                NULL, NULL, 0);
+    if (nmapped == 0) {
+        DWORD error = GetLastError();
+        PyMem_Free(locale_);
+        PyMem_Free(src_);
+        PyMem_DEL(dest);
+        return PyErr_SetFromWindowsErr(error);
+    }
+
+    PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1);
+    PyMem_Free(locale_);
+    PyMem_Free(src_);
+    PyMem_DEL(dest);
+
+    return ret;
+}
+
 /*[clinic input]
 _winapi.ReadFile
 
@@ -2023,6 +2085,7 @@ static PyMethodDef winapi_functions[] = {
     _WINAPI_OPENFILEMAPPING_METHODDEF
     _WINAPI_OPENPROCESS_METHODDEF
     _WINAPI_PEEKNAMEDPIPE_METHODDEF
+    _WINAPI_LCMAPSTRINGEX_METHODDEF
     _WINAPI_READFILE_METHODDEF
     _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF
     _WINAPI_TERMINATEPROCESS_METHODDEF
@@ -2160,6 +2223,22 @@ static int winapi_exec(PyObject *m)
     WINAPI_CONSTANT(F_DWORD, FILE_TYPE_PIPE);
     WINAPI_CONSTANT(F_DWORD, FILE_TYPE_REMOTE);
 
+    WINAPI_CONSTANT("u", LOCALE_NAME_INVARIANT);
+    WINAPI_CONSTANT(F_DWORD, LOCALE_NAME_MAX_LENGTH);
+    WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT);
+    WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT);
+
+    WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE);
+
     WINAPI_CONSTANT("i", NULL);
 
     return 0;
diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h
index 5bda156d7aa80c..ac73789d799c87 100644
--- a/Modules/clinic/_winapi.c.h
+++ b/Modules/clinic/_winapi.c.h
@@ -840,6 +840,38 @@ _winapi_PeekNamedPipe(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(_winapi_LCMapStringEx__doc__,
+"LCMapStringEx($module, /, locale, flags, src)\n"
+"--\n"
+"\n");
+
+#define _WINAPI_LCMAPSTRINGEX_METHODDEF    \
+    {"LCMapStringEx", (PyCFunction)(void(*)(void))_winapi_LCMapStringEx, METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__},
+
+static PyObject *
+_winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
+                           PyObject *src);
+
+static PyObject *
+_winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"locale", "flags", "src", NULL};
+    static _PyArg_Parser _parser = {"UkU:LCMapStringEx", _keywords, 0};
+    PyObject *locale;
+    DWORD flags;
+    PyObject *src;
+
+    if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
+        &locale, &flags, &src)) {
+        goto exit;
+    }
+    return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_winapi_ReadFile__doc__,
 "ReadFile($module, /, handle, size, overlapped=False)\n"
 "--\n"
@@ -1184,4 +1216,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=ac3623be6e42017c input=a9049054013a1b77]*/
+/*[clinic end generated code: output=8e13179bf25bdea5 input=a9049054013a1b77]*/

From 67d038a72aa0aa5cc1919525c7810cd334847a87 Mon Sep 17 00:00:00 2001
From: Steve Dower <steve.dower@python.org>
Date: Fri, 10 Jun 2022 16:42:32 +0100
Subject: [PATCH 2/2] Allow _winapi.LCMapStringEx to handle embedded nulls

---
 Modules/_winapi.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index a3c30f23955450..9b30a900326192 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1535,13 +1535,19 @@ _winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
     if (!locale_) {
         return NULL;
     }
-    wchar_t *src_ = PyUnicode_AsWideCharString(src, NULL);
+    Py_ssize_t srcLenAsSsize;
+    int srcLen;
+    wchar_t *src_ = PyUnicode_AsWideCharString(src, &srcLenAsSsize);
     if (!src_) {
         PyMem_Free(locale_);
         return NULL;
     }
+    srcLen = (int)srcLenAsSsize;
+    if (srcLen != srcLenAsSsize) {
+        srcLen = -1;
+    }
 
-    int dest_size = LCMapStringEx(locale_, flags, src_, -1, NULL, 0,
+    int dest_size = LCMapStringEx(locale_, flags, src_, srcLen, NULL, 0,
                                   NULL, NULL, 0);
     if (dest_size == 0) {
         PyMem_Free(locale_);
@@ -1556,7 +1562,7 @@ _winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
         return PyErr_NoMemory();
     }
 
-    int nmapped = LCMapStringEx(locale_, flags, src_, -1, dest, dest_size,
+    int nmapped = LCMapStringEx(locale_, flags, src_, srcLen, dest, dest_size,
                                 NULL, NULL, 0);
     if (nmapped == 0) {
         DWORD error = GetLastError();
@@ -1566,7 +1572,7 @@ _winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
         return PyErr_SetFromWindowsErr(error);
     }
 
-    PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1);
+    PyObject *ret = PyUnicode_FromWideChar(dest, dest_size);
     PyMem_Free(locale_);
     PyMem_Free(src_);
     PyMem_DEL(dest);