Skip to content

Commit

Permalink
Update examples/loom2parquetmerge.py
Browse files Browse the repository at this point in the history
Co-authored-by: codiumai-pr-agent-pro[bot] <151058649+codiumai-pr-agent-pro[bot]@users.noreply.github.com>
  • Loading branch information
ypriverol and codiumai-pr-agent-pro[bot] authored Sep 25, 2024
1 parent e2f7b9c commit 5a91f14
Showing 1 changed file with 2 additions and 8 deletions.
10 changes: 2 additions & 8 deletions examples/loom2parquetmerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def concatenate_parquet_files_incremental(files_paths, output_path, batch_size=1
:param batch_size: Number of rows to read from each file at a time.
"""
writer = None

with pq.ParquetWriter(output_path, schema=None, compression='gzip') as writer:
for file_path in files_paths:
print(f"Processing file: {file_path}")
parquet_file = pq.ParquetFile(file_path)
Expand All @@ -38,16 +38,10 @@ def concatenate_parquet_files_incremental(files_paths, output_path, batch_size=1
# Convert the batch to a PyArrow Table
table = pa.Table.from_batches([batch])

# If the writer is not initialized, create a new Parquet writer
if writer is None:
writer = pq.ParquetWriter(output_path, table.schema, compression='gzip')

# Write the batch to the output Parquet file
writer.write_table(table)

# Close the writer after all batches are written
if writer is not None:
writer.close()
print(f"Concatenated parquet file written to {output_path}")
print(f"Concatenated parquet file written to {output_path}")


Expand Down

0 comments on commit 5a91f14

Please sign in to comment.