From f4a8de5b414172e620cbc8bf303f8fc096f4d553 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 28 Jan 2025 21:04:11 -0800 Subject: [PATCH 1/4] gh-129005: Align FileIO.readall allocation Both now use a pre-allocated buffer of length `bufsize`, fill it using a readinto, and have matching "expand buffer" logic. On my machine this takes: `./python -m test -M8g -uall test_largefile -m test_large_read -v` from ~3.7 seconds to ~3.3 seconds --- Lib/_pyio.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 023478aa78c6a0..79c4475d0cd081 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1674,22 +1674,32 @@ def readall(self): except OSError: pass - result = bytearray() + result = bytearray(bufsize) + bytes_read = 0 while True: - if len(result) >= bufsize: - bufsize = len(result) - bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) + if bytes_read >= bufsize: + # Parallels _io/fileio.c new_buffersize + if bufsize > 65536: + addend = bufsize >> 3 + else: + addend = bufsize + 256 + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + bufsize += addend + result[bytes_read:bufsize] = b'\0' + assert bufsize - bytes_read > 0, "Should always try and read at least one byte" n = bufsize - len(result) try: - chunk = os.read(self._fd, n) + n = os.readinto(self._fd, memoryview(result)[bytes_read:]) except BlockingIOError: - if result: + if bytes_read > 0: break return None - if not chunk: # reached the end of the file + if n == 0: # reached the end of the file break - result += chunk + bytes_read += n + del result[bytes_read:] return bytes(result) def readinto(self, buffer): From 871979d62104f7cd8449ccf3f5e39d05978ceab6 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 28 Jan 2025 21:22:49 -0800 Subject: [PATCH 2/4] add blurb --- .../next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst diff --git a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst new file mode 100644 index 00000000000000..436e59065f2706 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst @@ -0,0 +1,2 @@ +``_pyio.FileIO.readall`` now allocates and resizes its buffer the same that +``_io.FileIO.readall`` does. From 5cee34efb0f0cfbd511d3696f8cca5d77ca5472a Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 28 Jan 2025 22:35:56 -0800 Subject: [PATCH 3/4] Update 2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst --- .../Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst index 436e59065f2706..91ecb1d237278b 100644 --- a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst +++ b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst @@ -1,2 +1 @@ -``_pyio.FileIO.readall`` now allocates and resizes its buffer the same that -``_io.FileIO.readall`` does. +``_pyio.FileIO.readall`` now allocates, resizes, and fills a data buffer using the same algorithm ``_io.FileIO.readall`` uses. From 869a31b5a071c8ffbf381abeddca822e9f5514ae Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 29 Jan 2025 13:59:25 -0800 Subject: [PATCH 4/4] Remove unneecssary variable --- Lib/_pyio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 79c4475d0cd081..76a27910da4d5f 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1688,7 +1688,6 @@ def readall(self): bufsize += addend result[bytes_read:bufsize] = b'\0' assert bufsize - bytes_read > 0, "Should always try and read at least one byte" - n = bufsize - len(result) try: n = os.readinto(self._fd, memoryview(result)[bytes_read:]) except BlockingIOError: