From d3dede69627d5b9e4e378dbe3953c14b0bed2227 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Tue, 12 Mar 2019 14:49:17 +0300
Subject: [PATCH 1/5] Fix gh-15086 properly instead of making a workaround

---
 pandas/_libs/parsers.pyx     |  6 +-----
 pandas/_libs/src/parser/io.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 36c4c752206a8..18959b2d37b7f 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -678,11 +678,7 @@ cdef class TextReader:
 
         if isinstance(source, basestring):
             if not isinstance(source, bytes):
-                if compat.PY36 and compat.is_platform_windows():
-                    # see gh-15086.
-                    encoding = "mbcs"
-                else:
-                    encoding = sys.getfilesystemencoding() or "utf-8"
+                encoding = sys.getfilesystemencoding() or "utf-8"
 
                 source = source.encode(encoding)
 
diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c
index f578ce138e274..acb64ef0af7bd 100644
--- a/pandas/_libs/src/parser/io.c
+++ b/pandas/_libs/src/parser/io.c
@@ -17,6 +17,11 @@ The full license is in the LICENSE file, distributed with this software.
 #define O_BINARY 0
 #endif  // O_BINARY
 
+#if PY_VERSION_HEX >= 0x03060000 && defined(_WIN32)
+#define USE_WIN_UTF16
+#include <Windows.h>
+#endif
+
 /*
   On-disk FILE, uncompressed
 */
@@ -27,7 +32,34 @@ void *new_file_source(char *fname, size_t buffer_size) {
         return NULL;
     }
 
+#ifdef USE_WIN_UTF16
+    // Fix gh-15086 properly - convert UTF8 to UTF16 that Windows widechar API
+    // accepts. This is needed because UTF8 might _not_ be convertible to MBCS
+    // for some conditions, as MBCS is locale-dependent, and not all unicode
+    // symbols can be expressed in it.
+    {
+        wchar_t* wname = NULL;
+        int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
+        if (required == 0) {
+            free(fs);
+            return NULL;
+        }
+        wname = (wchar_t*)malloc(required * sizeof(wchar_t));
+        if (wname == NULL) {
+            free(fs);
+            return NULL;
+        }
+        if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) < required) {
+            free(wname);
+            free(fs);
+            return NULL;
+        }
+        fs->fd = _wopen(wname, O_RDONLY | O_BINARY);
+        free(wname);
+    }
+#else
     fs->fd = open(fname, O_RDONLY | O_BINARY);
+#endif
     if (fs->fd == -1) {
         free(fs);
         return NULL;

From 810416e04bd99e1ed0d26b2f034f0c390b4e2bab Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 15 Mar 2019 21:57:32 +0300
Subject: [PATCH 2/5] fix code style

---
 pandas/_libs/src/parser/io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c
index acb64ef0af7bd..5d73230f32955 100644
--- a/pandas/_libs/src/parser/io.c
+++ b/pandas/_libs/src/parser/io.c
@@ -49,7 +49,8 @@ void *new_file_source(char *fname, size_t buffer_size) {
             free(fs);
             return NULL;
         }
-        if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) < required) {
+        if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) <
+                                                                required) {
             free(wname);
             free(fs);
             return NULL;

From 8fd2214524bbff1d8e0de3cf9b501a5708b9c7ca Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Tue, 19 Mar 2019 17:56:24 +0300
Subject: [PATCH 3/5] Make sure test_filename_with_special_chars properly tests
 combinations of chars Updated whatsnew

---
 doc/source/whatsnew/v0.25.0.rst       |  1 +
 pandas/tests/io/parser/test_common.py | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 73eb6a15a1b47..25f2033181c8b 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -175,6 +175,7 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+- Bug in `TextReader()` not properly working with UTF8 on Windows on Python 3.6+ (fix for :issue:`15086` instead of a workaround)
 
 Categorical
 ^^^^^^^^^^^
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 05da171d7dc31..9e776c2c15c0b 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -5,6 +5,7 @@
 specific classification into the other test modules.
 """
 
+import sys
 import codecs
 from collections import OrderedDict
 import csv
@@ -1904,12 +1905,21 @@ def test_suppress_error_output(all_parsers, capsys):
     assert captured.err == ""
 
 
+def __should_skip_utf8_test():
+    if compat.is_platform_windows():
+        import ctypes
+        ansi_codepage = ctypes.cdll.kernel32.GetACP()
+        return ansi_codepage != 1252 and sys.version_info < (3, 6)
+    return False
+
+@pytest.mark.skipif(__should_skip_utf8_test(),
+                    reason="Python < 3.6 won't pass on non-1252 codepage")
 def test_filename_with_special_chars(all_parsers):
     # see gh-15086.
     parser = all_parsers
     df = DataFrame({"a": [1, 2, 3]})
 
-    with tm.ensure_clean("sé-es-vé.csv") as path:
+    with tm.ensure_clean("sé-es-vé-sй.csv") as path:
         df.to_csv(path, index=False)
 
         result = parser.read_csv(path)

From 7961889162368a79dfd94d33f15aac58e2ad7b03 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Tue, 19 Mar 2019 18:16:33 +0300
Subject: [PATCH 4/5] Address comments by @jreback

---
 doc/source/whatsnew/v0.25.0.rst       |  2 +-
 pandas/tests/io/parser/test_common.py | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 25f2033181c8b..807cad8c8eec8 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -175,7 +175,6 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
-- Bug in `TextReader()` not properly working with UTF8 on Windows on Python 3.6+ (fix for :issue:`15086` instead of a workaround)
 
 Categorical
 ^^^^^^^^^^^
@@ -272,6 +271,7 @@ I/O
 - Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string "nan" instead of ``numpy.nan`` (:issue:`25468`)
 - :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`)
 - Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`)
+- Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`)
 -
 
 
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 9e776c2c15c0b..4ca8dd34cd175 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -5,8 +5,8 @@
 specific classification into the other test modules.
 """
 
-import sys
 import codecs
+import ctypes
 from collections import OrderedDict
 import csv
 from datetime import datetime
@@ -1905,15 +1905,14 @@ def test_suppress_error_output(all_parsers, capsys):
     assert captured.err == ""
 
 
-def __should_skip_utf8_test():
+def __windows_ansi_encoding_not_cp1252():
     if compat.is_platform_windows():
-        import ctypes
         ansi_codepage = ctypes.cdll.kernel32.GetACP()
-        return ansi_codepage != 1252 and sys.version_info < (3, 6)
+        return ansi_codepage != 1252 and not compat.PY36
     return False
 
-@pytest.mark.skipif(__should_skip_utf8_test(),
-                    reason="Python < 3.6 won't pass on non-1252 codepage")
+@pytest.mark.skipif(__windows_ansi_encoding_not_cp1252(),
+                    reason="On Python < 3.6 won't pass on non-1252 codepage")
 def test_filename_with_special_chars(all_parsers):
     # see gh-15086.
     parser = all_parsers

From 6095ea3c81786f856b48133db68fa09beb1d1561 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Wed, 20 Mar 2019 13:00:47 +0300
Subject: [PATCH 5/5] Parametrize test_filename_with_special_chars

Use CP-1252 and CP-1251 filenames separately,
skip the test on Windows on < 3.6 as it won't pass
---
 pandas/tests/io/parser/test_common.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 4ca8dd34cd175..9060543f1a373 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -6,7 +6,6 @@
 """
 
 import codecs
-import ctypes
 from collections import OrderedDict
 import csv
 from datetime import datetime
@@ -1905,20 +1904,15 @@ def test_suppress_error_output(all_parsers, capsys):
     assert captured.err == ""
 
 
-def __windows_ansi_encoding_not_cp1252():
-    if compat.is_platform_windows():
-        ansi_codepage = ctypes.cdll.kernel32.GetACP()
-        return ansi_codepage != 1252 and not compat.PY36
-    return False
-
-@pytest.mark.skipif(__windows_ansi_encoding_not_cp1252(),
-                    reason="On Python < 3.6 won't pass on non-1252 codepage")
-def test_filename_with_special_chars(all_parsers):
+@pytest.mark.skipif(compat.is_platform_windows() and not compat.PY36,
+                    reason="On Python < 3.6 won't pass on Windows")
+@pytest.mark.parametrize("filename", ["sé-es-vé.csv", "ru-sй.csv"])
+def test_filename_with_special_chars(all_parsers, filename):
     # see gh-15086.
     parser = all_parsers
     df = DataFrame({"a": [1, 2, 3]})
 
-    with tm.ensure_clean("sé-es-vé-sй.csv") as path:
+    with tm.ensure_clean(filename) as path:
         df.to_csv(path, index=False)
 
         result = parser.read_csv(path)