diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 6736aa9ee2b0ef..d1e7dfdc471b6c 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -728,6 +728,15 @@ than raw I/O does. Return :class:`bytes` containing the entire contents of the buffer. + .. method:: peek(size=1, /) + + Return bytes from the current position onwards without advancing the position. + At least one byte of data is returned if not at EOF. + Return an empty :class:`bytes` object at EOF. + If the size argument is negative or larger than the number of available bytes, + a copy of the buffer from the current position until the end is returned. + + .. versionadded:: 3.13 .. method:: read1(size=-1, /) @@ -761,9 +770,12 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. At most one - single read on the raw stream is done to satisfy the call. The number of - bytes returned may be less or more than requested. + Return bytes from the current position onwards without advancing the position. + At least one byte of data is returned if not at EOF. + Return an empty :class:`bytes` object at EOF. + At most one single read on the underlying raw stream is done to satisfy the call. + The *size* argument is ignored. + The number of read bytes depends on the buffer size and the current position in the internal buffer. .. method:: read(size=-1, /) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 1053aa5729ede4..44694c89ce6f12 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -180,6 +180,8 @@ and only logged in :ref:`Python Development Mode ` or on :ref:`Python built on debug mode `. (Contributed by Victor Stinner in :gh:`62948`.) +* Add :meth:`io.BytesIO.peek`. (Contributed by Marcel Martin in :gh:`90533`.) + opcode ------ diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 32698abac78d25..801f0447447a21 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -978,6 +978,13 @@ def tell(self): raise ValueError("tell on closed file") return self._pos + def peek(self, size=1): + if self.closed: + raise ValueError("peek on closed file") + if size < 0: + return self._buffer[self._pos:] + return self._buffer[self._pos:self._pos + size] + def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 731299294e6877..ffedfe71ee4da1 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -531,6 +531,48 @@ def test_relative_seek(self): memio.seek(1, 1) self.assertEqual(memio.read(), buf[1:]) + def test_peek(self): + buf = self.buftype("1234567890") + with self.ioclass(buf) as memio: + self.assertEqual(memio.tell(), 0) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(3), buf[:3]) + self.assertEqual(memio.peek(5), buf[:5]) + self.assertEqual(memio.peek(), buf[:1]) + self.assertEqual(memio.peek(0), b"") + self.assertEqual(memio.peek(len(buf) + 100), buf) + self.assertEqual(memio.peek(-1), buf) + self.assertEqual(memio.tell(), 0) + memio.read(1) + self.assertEqual(memio.tell(), 1) + self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(3), buf[1:4]) + self.assertEqual(memio.peek(5), buf[1:6]) + self.assertEqual(memio.peek(), buf[1:2]) + self.assertEqual(memio.peek(0), b"") + self.assertEqual(memio.peek(len(buf) + 100), buf[1:]) + self.assertEqual(memio.peek(-1), buf[1:]) + self.assertEqual(memio.tell(), 1) + memio.read() + self.assertEqual(memio.tell(), len(buf)) + self.assertEqual(memio.peek(1), self.EOF) + self.assertEqual(memio.peek(3), self.EOF) + self.assertEqual(memio.peek(5), self.EOF) + self.assertEqual(memio.peek(0), b"") + self.assertEqual(memio.tell(), len(buf)) + # Peeking works after writing + abc = self.buftype("abc") + memio.write(abc) + self.assertEqual(memio.peek(), self.EOF) + memio.seek(len(buf)) + self.assertEqual(memio.peek(), abc[:1]) + self.assertEqual(memio.peek(-1), abc) + self.assertEqual(memio.peek(len(abc) + 100), abc) + self.assertEqual(memio.tell(), len(buf)) + self.assertRaises(ValueError, memio.peek) + def test_unicode(self): memio = self.ioclass() diff --git a/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst new file mode 100644 index 00000000000000..c11f101b749c17 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst @@ -0,0 +1 @@ +Add :meth:`io.BytesIO.peek`. diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 16b8ac600ace79..428b6aba16dd4b 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -395,8 +395,9 @@ _io_BytesIO_tell_impl(bytesio *self) return PyLong_FromSsize_t(self->pos); } +// Read without advancing position static PyObject * -read_bytes(bytesio *self, Py_ssize_t size) +peek_bytes(bytesio *self, Py_ssize_t size) { const char *output; @@ -405,15 +406,23 @@ read_bytes(bytesio *self, Py_ssize_t size) if (size > 1 && self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) && self->exports == 0) { - self->pos += size; return Py_NewRef(self->buf); } output = PyBytes_AS_STRING(self->buf) + self->pos; - self->pos += size; return PyBytes_FromStringAndSize(output, size); } +static PyObject * +read_bytes(bytesio *self, Py_ssize_t size) +{ + PyObject *bytes = peek_bytes(self, size); + if (bytes != NULL) { + self->pos += size; + } + return bytes; +} + /*[clinic input] _io.BytesIO.read size: Py_ssize_t(accept={int, NoneType}) = -1 @@ -463,6 +472,38 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) return _io_BytesIO_read_impl(self, size); } + +/*[clinic input] +_io.BytesIO.peek + size: Py_ssize_t = 1 + / + +Return bytes from the stream without advancing the position. + +If the size argument is negative, read until EOF is reached. +Return an empty bytes object at EOF. +[clinic start generated code]*/ + +static PyObject * +_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) +/*[clinic end generated code: output=fa4d8ce28b35db9b input=1510f0fcf77c0048]*/ +{ + CHECK_CLOSED(self); + + /* adjust invalid sizes */ + Py_ssize_t n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + /* n can be negative after truncate() or seek() */ + if (size < 0) { + size = 0; + } + } + return peek_bytes(self, size); +} + + + /*[clinic input] _io.BytesIO.readline size: Py_ssize_t(accept={int, NoneType}) = -1 @@ -1020,6 +1061,7 @@ static struct PyMethodDef bytesio_methods[] = { _IO_BYTESIO_READLINE_METHODDEF _IO_BYTESIO_READLINES_METHODDEF _IO_BYTESIO_READ_METHODDEF + _IO_BYTESIO_PEEK_METHODDEF _IO_BYTESIO_GETBUFFER_METHODDEF _IO_BYTESIO_GETVALUE_METHODDEF _IO_BYTESIO_SEEK_METHODDEF diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index 37023e49087647..bde5baec1171f9 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -233,6 +233,52 @@ _io_BytesIO_read1(bytesio *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_io_BytesIO_peek__doc__, +"peek($self, size=1, /)\n" +"--\n" +"\n" +"Return bytes from the stream without advancing the position.\n" +"\n" +"If the size argument is negative, read until EOF is reached.\n" +"Return an empty bytes object at EOF."); + +#define _IO_BYTESIO_PEEK_METHODDEF \ + {"peek", _PyCFunction_CAST(_io_BytesIO_peek), METH_FASTCALL, _io_BytesIO_peek__doc__}, + +static PyObject * +_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size); + +static PyObject * +_io_BytesIO_peek(bytesio *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t size = 1; + + if (!_PyArg_CheckPositional("peek", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + size = ival; + } +skip_optional: + return_value = _io_BytesIO_peek_impl(self, size); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_BytesIO_readline__doc__, "readline($self, size=-1, /)\n" "--\n" @@ -534,4 +580,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=2be0e05a8871b7e2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d678d2262d107c8f input=a9049054013a1b77]*/