diff --git a/docs/source/src/python/user-guide/io/cloud-storage.py b/docs/source/src/python/user-guide/io/cloud-storage.py index 12b02df28e61..f0cbe67a9a0c 100644 --- a/docs/source/src/python/user-guide/io/cloud-storage.py +++ b/docs/source/src/python/user-guide/io/cloud-storage.py @@ -77,20 +77,39 @@ def get_credentials() -> pl.CredentialProviderFunctionReturn: # --8<-- [end:scan_pyarrow_dataset] # --8<-- [start:write_parquet] +import polars as pl + +df = pl.DataFrame( + { + "foo": ["a", "b", "c", "d", "d"], + "bar": [1, 2, 3, 4, 5], + } +) + +destination = "s3://bucket/my_file.parquet" + +df.write_parquet(destination) +# --8<-- [end:write_parquet] + +# --8<-- [start:write_file_object] import polars as pl import s3fs +import gzip + +df = pl.DataFrame( + { + "foo": ["a", "b", "c", "d", "d"], + "bar": [1, 2, 3, 4, 5], + } +) -df = pl.DataFrame({ - "foo": ["a", "b", "c", "d", "d"], - "bar": [1, 2, 3, 4, 5], -}) +destination = "s3://bucket/my_file.csv.gz" fs = s3fs.S3FileSystem() -destination = "s3://bucket/my_file.parquet" -# write parquet -with fs.open(destination, mode='wb') as f: - df.write_parquet(f) -# --8<-- [end:write_parquet] +with fs.open(destination, "wb") as cloud_f: + with gzip.open(cloud_f, "w") as f: + df.write_csv(f) +# --8<-- [end:write_file_object] """ diff --git a/docs/source/src/rust/user-guide/io/cloud-storage.rs b/docs/source/src/rust/user-guide/io/cloud-storage.rs index 2df882a39c00..19fe4e66b815 100644 --- a/docs/source/src/rust/user-guide/io/cloud-storage.rs +++ b/docs/source/src/rust/user-guide/io/cloud-storage.rs @@ -44,3 +44,6 @@ async fn main() { // --8<-- [start:write_parquet] // --8<-- [end:write_parquet] + +// --8<-- [start:write_file_object] +// --8<-- [end:write_file_object] diff --git a/docs/source/user-guide/io/cloud-storage.md b/docs/source/user-guide/io/cloud-storage.md index f12ad4576ebd..5d1449c02c19 100644 --- a/docs/source/user-guide/io/cloud-storage.md +++ b/docs/source/user-guide/io/cloud-storage.md @@ -71,7 +71,12 @@ We first create a PyArrow dataset and then create a `LazyFrame` from the dataset ## Writing to cloud storage -We can write a `DataFrame` to cloud storage in Python using s3fs for S3, adlfs for Azure Blob -Storage and gcsfs for Google Cloud Storage. In this example, we write a Parquet file to S3. +`DataFrame`s can also be written to cloud storage by passing a cloud URL: {{code_block('user-guide/io/cloud-storage','write_parquet',['write_parquet'])}} + +Note that `DataFrame`s can also be written to any Python file object that supports writes. This can +be helpful for performing operations that are not yet natively supported, e.g. writing a compressed +CSV directly to cloud: + +{{code_block('user-guide/io/cloud-storage','write_file_object',['write_csv'])}}