-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-31507: [Python] Address docstrings in Streams and File Access (Stream Classes) #33698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
cb18aba
Add docstrings for OSFile and PythonFile
AlenkaF 2042d74
Add docstrings for BufferReader, BufferOutputStream and FixedSizeBuff…
AlenkaF e06ace5
Add docstrings to MemoryMappedFile, CompressedInputStream and Compres…
AlenkaF e9777aa
Update python/pyarrow/io.pxi
AlenkaF ea1da22
Update MemoryMappedFile class dosctring example
AlenkaF 61d18cf
Update OSFile class docstring example
AlenkaF 67eed84
Add a summary for BufferOutputStream class docstrings
AlenkaF 10d5e89
Use pa.input_stream and pa.output_stream as preferred way
AlenkaF 9c2185a
Change BufferOutputStream docstrings
AlenkaF 19e8359
Update docstring example for CompressedInputStream
AlenkaF File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -782,6 +782,34 @@ cdef class PythonFile(NativeFile): | |
| >>> import pyarrow as pa | ||
| >>> pa.PythonFile(io.BytesIO()) | ||
| <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False> | ||
|
|
||
| Create a stream for writing: | ||
|
|
||
| >>> buf = io.BytesIO() | ||
| >>> f = pa.PythonFile(buf, mode = 'w') | ||
| >>> f.writable() | ||
| True | ||
| >>> f.write(b'PythonFile') | ||
| 10 | ||
| >>> buf.getvalue() | ||
| b'PythonFile' | ||
| >>> f.close() | ||
| >>> f | ||
| <pyarrow.PythonFile closed=True own_file=False is_seekable=False is_writable=True is_readable=False> | ||
|
|
||
| Create a stream for reading: | ||
|
|
||
| >>> buf = io.BytesIO(b'PythonFile') | ||
| >>> f = pa.PythonFile(buf, mode = 'r') | ||
| >>> f.mode | ||
| 'rb' | ||
| >>> f.read() | ||
| b'PythonFile' | ||
| >>> f | ||
| <pyarrow.PythonFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True> | ||
| >>> f.close() | ||
| >>> f | ||
| <pyarrow.PythonFile closed=True own_file=False is_seekable=True is_writable=False is_readable=True> | ||
| """ | ||
| cdef: | ||
| object handle | ||
|
|
@@ -851,6 +879,23 @@ cdef class MemoryMappedFile(NativeFile): | |
| A stream that represents a memory-mapped file. | ||
|
|
||
| Supports 'r', 'r+', 'w' modes. | ||
|
|
||
| Examples | ||
| -------- | ||
| Create a new file with memory map: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> mmap = pa.create_memory_map('example_mmap.dat', 10) | ||
| >>> mmap | ||
| <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=True is_readable=True> | ||
| >>> mmap.close() | ||
|
|
||
| Open an existing file with memory map: | ||
|
|
||
| >>> with pa.memory_map('example_mmap.dat') as mmap: | ||
| ... mmap | ||
| ... | ||
| <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True> | ||
| """ | ||
| cdef: | ||
| shared_ptr[CMemoryMappedFile] handle | ||
|
|
@@ -978,6 +1023,34 @@ def create_memory_map(path, size): | |
| cdef class OSFile(NativeFile): | ||
| """ | ||
| A stream backed by a regular file descriptor. | ||
|
|
||
| Examples | ||
| -------- | ||
| Create a new file to write to: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> with pa.OSFile('example_osfile.arrow', mode='w') as f: | ||
| ... f.writable() | ||
| ... f.write(b'OSFile') | ||
| ... f.seekable() | ||
| ... | ||
| True | ||
| 6 | ||
| False | ||
|
|
||
| Open the file to read: | ||
|
|
||
| >>> with pa.OSFile('example_osfile.arrow', mode='r') as f: | ||
| ... f.mode | ||
| ... f.read() | ||
| ... | ||
| 'rb' | ||
| b'OSFile' | ||
|
|
||
| Inspect created OSFile: | ||
|
|
||
| >>> pa.OSFile('example_osfile.arrow') | ||
| <pyarrow.OSFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True> | ||
| """ | ||
| cdef: | ||
| object path | ||
|
|
@@ -1020,6 +1093,26 @@ cdef class OSFile(NativeFile): | |
| cdef class FixedSizeBufferWriter(NativeFile): | ||
| """ | ||
| A stream writing to a Arrow buffer. | ||
|
|
||
| Examples | ||
| -------- | ||
| Create a stream to write to ``pyarrow.Buffer``: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> buf = pa.allocate_buffer(5) | ||
| >>> with pa.output_stream(buf) as stream: | ||
| ... stream.write(b'abcde') | ||
| ... stream | ||
| ... | ||
| 5 | ||
| <pyarrow.FixedSizeBufferWriter closed=False own_file=False is_seekable=False is_writable=True is_readable=False> | ||
|
|
||
| Inspect the buffer: | ||
|
|
||
| >>> buf.to_pybytes() | ||
| b'abcde' | ||
| >>> buf | ||
| <pyarrow.Buffer address=... size=5 is_cpu=True is_mutable=True> | ||
| """ | ||
|
|
||
| def __cinit__(self, Buffer buffer): | ||
|
|
@@ -1327,6 +1420,27 @@ def allocate_buffer(int64_t size, MemoryPool memory_pool=None, | |
|
|
||
|
|
||
| cdef class BufferOutputStream(NativeFile): | ||
| """ | ||
| An output stream that writes to a resizable buffer. | ||
|
|
||
| The buffer is produced as a result when ``get.value()`` is called. | ||
|
|
||
| Examples | ||
AlenkaF marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| -------- | ||
| Create an output stream, write data to it and finalize it with | ||
| ``get.value()``: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> f = pa.BufferOutputStream() | ||
| >>> f.write(b'pyarrow.Buffer') | ||
| 14 | ||
| >>> f.closed | ||
| False | ||
| >>> f.getvalue() | ||
| <pyarrow.Buffer address=... size=14 is_cpu=True is_mutable=True> | ||
| >>> f.closed | ||
| True | ||
| """ | ||
|
|
||
| cdef: | ||
| shared_ptr[CResizableBuffer] buffer | ||
|
|
@@ -1368,6 +1482,24 @@ cdef class BufferReader(NativeFile): | |
| Parameters | ||
| ---------- | ||
| obj : Python bytes or pyarrow.Buffer | ||
|
|
||
| Examples | ||
| -------- | ||
| Create an Arrow input stream and inspect it: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> data = b'reader data' | ||
| >>> buf = memoryview(data) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the memoryview(..) needed?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It needs some kind of a source (str, Path, buffer, file-like object, …): >>> import pyarrow as pa
>>> data = b'reader data'
>>> with pa.input_stream(data) as stream:
... stream.size()
...
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "pyarrow/io.pxi", line 2403, in pyarrow.lib.input_stream
hasattr(source, 'closed')):
TypeError: pa.input_stream() called with instance of '<class 'bytes'>'Can change the source, if preferred. |
||
| >>> with pa.input_stream(buf) as stream: | ||
| ... stream.size() | ||
| ... stream.read(6) | ||
| ... stream.seek(7) | ||
| ... stream.read(15) | ||
| ... | ||
| 11 | ||
| b'reader' | ||
| 7 | ||
| b'data' | ||
| """ | ||
| cdef: | ||
| Buffer buffer | ||
|
|
@@ -1393,6 +1525,36 @@ cdef class CompressedInputStream(NativeFile): | |
| Input stream object to wrap with the compression. | ||
| compression : str | ||
| The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd"). | ||
|
|
||
| Examples | ||
| -------- | ||
| Create an ouput stream wich compresses the data: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> data = b"Compressed stream" | ||
| >>> raw = pa.BufferOutputStream() | ||
| >>> with pa.CompressedOutputStream(raw, "gzip") as compressed: | ||
| ... compressed.write(data) | ||
| ... | ||
| 17 | ||
|
|
||
| Create an input stream with decompression referencing the | ||
| buffer with compressed data: | ||
|
|
||
| >>> cdata = raw.getvalue() | ||
| >>> with pa.input_stream(cdata, compression="gzip") as compressed: | ||
| ... compressed.read() | ||
| ... | ||
| b'Compressed stream' | ||
|
|
||
| which actually translates to the use of ``BufferReader``and | ||
| ``CompressedInputStream``: | ||
|
|
||
| >>> raw = pa.BufferReader(cdata) | ||
| >>> with pa.CompressedInputStream(raw, "gzip") as compressed: | ||
| ... compressed.read() | ||
| ... | ||
| b'Compressed stream' | ||
| """ | ||
|
|
||
| def __init__(self, object stream, str compression not None): | ||
|
|
@@ -1420,6 +1582,18 @@ cdef class CompressedOutputStream(NativeFile): | |
| Input stream object to wrap with the compression. | ||
| compression : str | ||
| The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd"). | ||
|
|
||
| Examples | ||
| -------- | ||
| Create an ouput stream wich compresses the data: | ||
|
|
||
| >>> import pyarrow as pa | ||
| >>> data = b"Compressed stream" | ||
| >>> raw = pa.BufferOutputStream() | ||
| >>> with pa.CompressedOutputStream(raw, "gzip") as compressed: | ||
| ... compressed.write(data) | ||
| ... | ||
| 17 | ||
| """ | ||
|
|
||
| def __init__(self, object stream, str compression not None): | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.