@@ -36,18 +36,20 @@ def create_table_with_batches(num_batches, rows_per_batch):
3636# Test generate_write_requests with different numbers of batches in the input table.
3737# The total rows in the generated table is constantly 1000000.
3838@pytest .mark .parametrize (
39- "num_batches, rows_per_batch" ,
39+ "num_batches, rows_per_batch, expected_requests " ,
4040 [
41- (1 , 1000000 ),
42- (10 , 100000 ),
43- (100 , 10000 ),
44- (1000 , 1000 ),
45- (10000 , 100 ),
46- (100000 , 10 ),
47- (1000000 , 1 ),
41+ (1 , 1000000 , 32 ),
42+ (10 , 100000 , 40 ),
43+ (100 , 10000 , 34 ),
44+ (1000 , 1000 , 26 ),
45+ (10000 , 100 , 26 ),
46+ (100000 , 10 , 26 ),
47+ (1000000 , 1 , 26 ),
4848 ],
4949)
50- def test_generate_write_requests_varying_batches (num_batches , rows_per_batch ):
50+ def test_generate_write_requests_varying_batches (
51+ num_batches , rows_per_batch , expected_requests
52+ ):
5153 """Test generate_write_requests with different numbers of batches in the input table."""
5254 # Create a table that returns `num_batches` when to_batches() is called.
5355 table = create_table_with_batches (num_batches , rows_per_batch )
@@ -63,15 +65,17 @@ def test_generate_write_requests_varying_batches(num_batches, rows_per_batch):
6365 f"\n Time used to generate requests for { num_batches } batches: { end_time - start_time :.4f} seconds"
6466 )
6567
66- # We expect the requests to be aggregated until 7MB.
67- # Since the row number is constant, the number of requests should be deterministic.
68- assert len (requests ) == 26
68+ assert len (requests ) == expected_requests
6969
7070 # Verify total rows in requests matches total rows in table
7171 total_rows_processed = 0
7272 for request in requests :
7373 # Deserialize the batch from the request to count rows
7474 serialized_batch = request .arrow_rows .rows .serialized_record_batch
75+
76+ # Verify the batch size is less than 7MB
77+ assert len (serialized_batch ) <= 7 * 1024 * 1024
78+
7579 # We need a schema to read the batch. The schema is PYARROW_SCHEMA.
7680 batch = pa .ipc .read_record_batch (
7781 serialized_batch , append_rows_with_arrow .PYARROW_SCHEMA
0 commit comments