From 96078b08952d0fd992dfd6c79b8dbd044665c469 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 28 Jan 2025 21:04:11 -0800 Subject: [PATCH 1/2] gh-129005: Align FileIO.readall allocation Both now use a pre-allocated buffer of length `bufsize`, fill it using a readinto, and have matching "expand buffer" logic. On my machine this takes: `./python -m test -M8g -uall test_largefile -m test_large_read -v` from ~3.7 seconds to ~3.4 seconds --- Lib/_pyio.py | 27 ++++++++++++------- ...-01-28-21-22-44.gh-issue-129005.h57i9j.rst | 1 + 2 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 023478aa78c6a0..76a27910da4d5f 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1674,22 +1674,31 @@ def readall(self): except OSError: pass - result = bytearray() + result = bytearray(bufsize) + bytes_read = 0 while True: - if len(result) >= bufsize: - bufsize = len(result) - bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) - n = bufsize - len(result) + if bytes_read >= bufsize: + # Parallels _io/fileio.c new_buffersize + if bufsize > 65536: + addend = bufsize >> 3 + else: + addend = bufsize + 256 + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + bufsize += addend + result[bytes_read:bufsize] = b'\0' + assert bufsize - bytes_read > 0, "Should always try and read at least one byte" try: - chunk = os.read(self._fd, n) + n = os.readinto(self._fd, memoryview(result)[bytes_read:]) except BlockingIOError: - if result: + if bytes_read > 0: break return None - if not chunk: # reached the end of the file + if n == 0: # reached the end of the file break - result += chunk + bytes_read += n + del result[bytes_read:] return bytes(result) def readinto(self, buffer): diff --git a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst new file mode 100644 index 00000000000000..91ecb1d237278b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst @@ -0,0 +1 @@ +``_pyio.FileIO.readall`` now allocates, resizes, and fills a data buffer using the same algorithm ``_io.FileIO.readall`` uses. From 044ec508ec72f88ee5a9d050563d5084cc4be66c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 30 Jan 2025 11:49:13 +0100 Subject: [PATCH 2/2] Update Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst --- .../Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst index 91ecb1d237278b..c76fb05e196f87 100644 --- a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst +++ b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst @@ -1 +1,2 @@ -``_pyio.FileIO.readall`` now allocates, resizes, and fills a data buffer using the same algorithm ``_io.FileIO.readall`` uses. +``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer using +the same algorithm ``_io.FileIO.readall()`` uses.