Skip to content

Commit f2d632e

Browse files
GH-31507: [Python] Address docstrings in Streams and File Access (Stream Classes) (#33698)
### Rationale for this change Ensure docstrings for [Streams and File Access](https://arrow.apache.org/docs/python/api/files.html) - Stream Classes - have an Examples section. ### What changes are included in this PR? Docstrings are added to listed Stream Classes: - OSFile - PythonFile - BufferReader - BufferOutputStream - FixedSizeBufferWriter - MemoryMappedFile - CompressedInputStream - CompressedOutputStream ### Are these changes tested? Yes, locally with `pytest --doctest-cython --disable-warnings pyarrow` and on the CI with `Python / AMD64 Conda Python 3.9 Sphinx & Numpydoc` build. ### Are there any user-facing changes? No. * Closes: #31507 Lead-authored-by: Alenka Frim <frim.alenka@gmail.com> Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com> Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Alenka Frim <frim.alenka@gmail.com>
1 parent 7db7e6a commit f2d632e

File tree

1 file changed

+174
-0
lines changed

1 file changed

+174
-0
lines changed

python/pyarrow/io.pxi

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,34 @@ cdef class PythonFile(NativeFile):
782782
>>> import pyarrow as pa
783783
>>> pa.PythonFile(io.BytesIO())
784784
<pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
785+
786+
Create a stream for writing:
787+
788+
>>> buf = io.BytesIO()
789+
>>> f = pa.PythonFile(buf, mode = 'w')
790+
>>> f.writable()
791+
True
792+
>>> f.write(b'PythonFile')
793+
10
794+
>>> buf.getvalue()
795+
b'PythonFile'
796+
>>> f.close()
797+
>>> f
798+
<pyarrow.PythonFile closed=True own_file=False is_seekable=False is_writable=True is_readable=False>
799+
800+
Create a stream for reading:
801+
802+
>>> buf = io.BytesIO(b'PythonFile')
803+
>>> f = pa.PythonFile(buf, mode = 'r')
804+
>>> f.mode
805+
'rb'
806+
>>> f.read()
807+
b'PythonFile'
808+
>>> f
809+
<pyarrow.PythonFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
810+
>>> f.close()
811+
>>> f
812+
<pyarrow.PythonFile closed=True own_file=False is_seekable=True is_writable=False is_readable=True>
785813
"""
786814
cdef:
787815
object handle
@@ -851,6 +879,23 @@ cdef class MemoryMappedFile(NativeFile):
851879
A stream that represents a memory-mapped file.
852880
853881
Supports 'r', 'r+', 'w' modes.
882+
883+
Examples
884+
--------
885+
Create a new file with memory map:
886+
887+
>>> import pyarrow as pa
888+
>>> mmap = pa.create_memory_map('example_mmap.dat', 10)
889+
>>> mmap
890+
<pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=True is_readable=True>
891+
>>> mmap.close()
892+
893+
Open an existing file with memory map:
894+
895+
>>> with pa.memory_map('example_mmap.dat') as mmap:
896+
... mmap
897+
...
898+
<pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
854899
"""
855900
cdef:
856901
shared_ptr[CMemoryMappedFile] handle
@@ -1004,6 +1049,34 @@ def create_memory_map(path, size):
10041049
cdef class OSFile(NativeFile):
10051050
"""
10061051
A stream backed by a regular file descriptor.
1052+
1053+
Examples
1054+
--------
1055+
Create a new file to write to:
1056+
1057+
>>> import pyarrow as pa
1058+
>>> with pa.OSFile('example_osfile.arrow', mode='w') as f:
1059+
... f.writable()
1060+
... f.write(b'OSFile')
1061+
... f.seekable()
1062+
...
1063+
True
1064+
6
1065+
False
1066+
1067+
Open the file to read:
1068+
1069+
>>> with pa.OSFile('example_osfile.arrow', mode='r') as f:
1070+
... f.mode
1071+
... f.read()
1072+
...
1073+
'rb'
1074+
b'OSFile'
1075+
1076+
Inspect created OSFile:
1077+
1078+
>>> pa.OSFile('example_osfile.arrow')
1079+
<pyarrow.OSFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
10071080
"""
10081081
cdef:
10091082
object path
@@ -1046,6 +1119,26 @@ cdef class OSFile(NativeFile):
10461119
cdef class FixedSizeBufferWriter(NativeFile):
10471120
"""
10481121
A stream writing to a Arrow buffer.
1122+
1123+
Examples
1124+
--------
1125+
Create a stream to write to ``pyarrow.Buffer``:
1126+
1127+
>>> import pyarrow as pa
1128+
>>> buf = pa.allocate_buffer(5)
1129+
>>> with pa.output_stream(buf) as stream:
1130+
... stream.write(b'abcde')
1131+
... stream
1132+
...
1133+
5
1134+
<pyarrow.FixedSizeBufferWriter closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
1135+
1136+
Inspect the buffer:
1137+
1138+
>>> buf.to_pybytes()
1139+
b'abcde'
1140+
>>> buf
1141+
<pyarrow.Buffer address=... size=5 is_cpu=True is_mutable=True>
10491142
"""
10501143

10511144
def __cinit__(self, Buffer buffer):
@@ -1353,6 +1446,27 @@ def allocate_buffer(int64_t size, MemoryPool memory_pool=None,
13531446

13541447

13551448
cdef class BufferOutputStream(NativeFile):
1449+
"""
1450+
An output stream that writes to a resizable buffer.
1451+
1452+
The buffer is produced as a result when ``get.value()`` is called.
1453+
1454+
Examples
1455+
--------
1456+
Create an output stream, write data to it and finalize it with
1457+
``get.value()``:
1458+
1459+
>>> import pyarrow as pa
1460+
>>> f = pa.BufferOutputStream()
1461+
>>> f.write(b'pyarrow.Buffer')
1462+
14
1463+
>>> f.closed
1464+
False
1465+
>>> f.getvalue()
1466+
<pyarrow.Buffer address=... size=14 is_cpu=True is_mutable=True>
1467+
>>> f.closed
1468+
True
1469+
"""
13561470

13571471
cdef:
13581472
shared_ptr[CResizableBuffer] buffer
@@ -1394,6 +1508,24 @@ cdef class BufferReader(NativeFile):
13941508
Parameters
13951509
----------
13961510
obj : Python bytes or pyarrow.Buffer
1511+
1512+
Examples
1513+
--------
1514+
Create an Arrow input stream and inspect it:
1515+
1516+
>>> import pyarrow as pa
1517+
>>> data = b'reader data'
1518+
>>> buf = memoryview(data)
1519+
>>> with pa.input_stream(buf) as stream:
1520+
... stream.size()
1521+
... stream.read(6)
1522+
... stream.seek(7)
1523+
... stream.read(15)
1524+
...
1525+
11
1526+
b'reader'
1527+
7
1528+
b'data'
13971529
"""
13981530
cdef:
13991531
Buffer buffer
@@ -1419,6 +1551,36 @@ cdef class CompressedInputStream(NativeFile):
14191551
Input stream object to wrap with the compression.
14201552
compression : str
14211553
The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
1554+
1555+
Examples
1556+
--------
1557+
Create an ouput stream wich compresses the data:
1558+
1559+
>>> import pyarrow as pa
1560+
>>> data = b"Compressed stream"
1561+
>>> raw = pa.BufferOutputStream()
1562+
>>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
1563+
... compressed.write(data)
1564+
...
1565+
17
1566+
1567+
Create an input stream with decompression referencing the
1568+
buffer with compressed data:
1569+
1570+
>>> cdata = raw.getvalue()
1571+
>>> with pa.input_stream(cdata, compression="gzip") as compressed:
1572+
... compressed.read()
1573+
...
1574+
b'Compressed stream'
1575+
1576+
which actually translates to the use of ``BufferReader``and
1577+
``CompressedInputStream``:
1578+
1579+
>>> raw = pa.BufferReader(cdata)
1580+
>>> with pa.CompressedInputStream(raw, "gzip") as compressed:
1581+
... compressed.read()
1582+
...
1583+
b'Compressed stream'
14221584
"""
14231585

14241586
def __init__(self, object stream, str compression not None):
@@ -1446,6 +1608,18 @@ cdef class CompressedOutputStream(NativeFile):
14461608
Input stream object to wrap with the compression.
14471609
compression : str
14481610
The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
1611+
1612+
Examples
1613+
--------
1614+
Create an ouput stream wich compresses the data:
1615+
1616+
>>> import pyarrow as pa
1617+
>>> data = b"Compressed stream"
1618+
>>> raw = pa.BufferOutputStream()
1619+
>>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
1620+
... compressed.write(data)
1621+
...
1622+
17
14491623
"""
14501624

14511625
def __init__(self, object stream, str compression not None):

0 commit comments

Comments
 (0)