From 982757503002cad7035fb4ba1ec255ae1f4f3e99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Mon, 19 Sep 2022 21:43:08 -0400
Subject: [PATCH 1/5] REGR: TextIOWrapper raising an error in read_csv

---
 doc/source/whatsnew/v1.5.1.rst                     |  2 +-
 pandas/io/parsers/c_parser_wrapper.py              |  3 +++
 pandas/tests/io/parser/common/test_common_basic.py | 14 ++++++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst
index f8069b5476d9e..9d40d9118db32 100644
--- a/doc/source/whatsnew/v1.5.1.rst
+++ b/doc/source/whatsnew/v1.5.1.rst
@@ -14,7 +14,7 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
--
+- Regression in :func:`.read_csv` causing an ``EmptyDataError`` when using an UTF-8 file handle that was already read from (:issue:`48646`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 99051ec661413..69a315a2fa8fd 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -74,6 +74,9 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
             and src.encoding == "utf-8"
             and (src.errors or "strict") == kwds["encoding_errors"]
         ):
+            # the internal buffer TextIOWrapper.buffer might have read ahead, make sure
+            # to first go back where TextIOWrapper is
+            src.seek(src.tell())
             # error: Incompatible types in assignment (expression has type "BinaryIO",
             # variable has type "ReadCsvBuffer[str]")
             src = src.buffer  # type: ignore[assignment]
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index a7cdc3c1a84d2..a7ef18ef228da 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -928,3 +928,17 @@ def test_read_table_posargs_deprecation(all_parsers):
         "except for the argument 'filepath_or_buffer' will be keyword-only"
     )
     parser.read_table_check_warnings(FutureWarning, msg, data, " ")
+
+
+def test_read_seek(all_parsers):
+    # GH48646
+    parser = all_parsers
+    prefix = "### DATA\n"
+    content = "nkey,value\ntables,rectangular\n"
+    with tm.ensure_clean() as path:
+        Path(path).write_text(prefix + content)
+        with open(path, mode="r") as file:
+            file.readline()
+            actual = parser.read_csv(file)
+        expected = parser.read_csv(StringIO(content))
+    tm.assert_frame_equal(actual, expected)

From cfe3446e93711d7f17cffbb8a05c539e835dc5fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Mon, 19 Sep 2022 22:08:49 -0400
Subject: [PATCH 2/5] pyupgrade

---
 pandas/tests/io/parser/common/test_common_basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index a7ef18ef228da..359b059252556 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -937,7 +937,7 @@ def test_read_seek(all_parsers):
     content = "nkey,value\ntables,rectangular\n"
     with tm.ensure_clean() as path:
         Path(path).write_text(prefix + content)
-        with open(path, mode="r") as file:
+        with open(path, encoding="utf-8") as file:
             file.readline()
             actual = parser.read_csv(file)
         expected = parser.read_csv(StringIO(content))

From b081cc8eec3c50234942d44c61a0be01d9210140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Tue, 20 Sep 2022 07:39:52 -0400
Subject: [PATCH 3/5] do not try to seek on unseekable buffers

---
 pandas/io/parsers/c_parser_wrapper.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 69a315a2fa8fd..9dddf863ec83e 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -73,10 +73,8 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
             isinstance(src, TextIOWrapper)
             and src.encoding == "utf-8"
             and (src.errors or "strict") == kwds["encoding_errors"]
+            and (not src.seekable() or src.tell() == src.buffer.tell())
         ):
-            # the internal buffer TextIOWrapper.buffer might have read ahead, make sure
-            # to first go back where TextIOWrapper is
-            src.seek(src.tell())
             # error: Incompatible types in assignment (expression has type "BinaryIO",
             # variable has type "ReadCsvBuffer[str]")
             src = src.buffer  # type: ignore[assignment]

From 747300a69e21b0ea4019f1ba14c097bf8bf2ca09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Tue, 20 Sep 2022 07:43:37 -0400
Subject: [PATCH 4/5] unseekable buffer might also have read ahead

---
 pandas/io/parsers/c_parser_wrapper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 9dddf863ec83e..141ff353dfe75 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -73,7 +73,8 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
             isinstance(src, TextIOWrapper)
             and src.encoding == "utf-8"
             and (src.errors or "strict") == kwds["encoding_errors"]
-            and (not src.seekable() or src.tell() == src.buffer.tell())
+            and src.seekable()
+            and src.tell() == src.buffer.tell()
         ):
             # error: Incompatible types in assignment (expression has type "BinaryIO",
             # variable has type "ReadCsvBuffer[str]")

From f5f7fab2d85d65f249693f9e1a085c415a98c67a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Tue, 20 Sep 2022 09:54:37 -0400
Subject: [PATCH 5/5] safer alternative: do not mess with internal/private(?)
 buffer of TextIOWrapper (effectively applies the shortcut only to files
 pandas opens)

---
 pandas/io/parsers/c_parser_wrapper.py | 14 --------------
 pandas/io/parsers/readers.py          | 11 +++++++++++
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
index 141ff353dfe75..6e4ea85548230 100644
--- a/pandas/io/parsers/c_parser_wrapper.py
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -2,7 +2,6 @@
 
 from collections import defaultdict
 import inspect
-from io import TextIOWrapper
 from typing import (
     TYPE_CHECKING,
     Hashable,
@@ -67,19 +66,6 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
         # Have to pass int, would break tests using TextReader directly otherwise :(
         kwds["on_bad_lines"] = self.on_bad_lines.value
 
-        # c-engine can cope with utf-8 bytes. Remove TextIOWrapper when its errors
-        # policy is the same as the one given to read_csv
-        if (
-            isinstance(src, TextIOWrapper)
-            and src.encoding == "utf-8"
-            and (src.errors or "strict") == kwds["encoding_errors"]
-            and src.seekable()
-            and src.tell() == src.buffer.tell()
-        ):
-            # error: Incompatible types in assignment (expression has type "BinaryIO",
-            # variable has type "ReadCsvBuffer[str]")
-            src = src.buffer  # type: ignore[assignment]
-
         for key in (
             "storage_options",
             "encoding",
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 20122d69748aa..eaec4c6bd5991 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -60,6 +60,7 @@
 from pandas.io.common import (
     IOHandles,
     get_handle,
+    stringify_path,
     validate_header_arg,
 )
 from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper
@@ -1727,6 +1728,16 @@ def _make_engine(
             if engine == "pyarrow":
                 is_text = False
                 mode = "rb"
+            elif (
+                engine == "c"
+                and self.options.get("encoding", "utf-8") == "utf-8"
+                and isinstance(stringify_path(f), str)
+            ):
+                # c engine can decode utf-8 bytes, adding TextIOWrapper makes
+                # the c-engine especially for memory_map=True far slower
+                is_text = False
+                if "b" not in mode:
+                    mode += "b"
             self.handles = get_handle(
                 f,
                 mode,