@@ -613,44 +613,34 @@ def _check_batch(block_iter: Iterable[Block], ctx) -> Iterable[Block]:
613613 assert op .completed ()
614614
615615
616- @pytest .mark .parametrize ("use_actors" , [False , True ])
617- @pytest .mark .parametrize ("preserve_order" , [False , True ])
618- @pytest .mark .parametrize (
619- "target_max_block_size,num_expected_blocks" , [(1 , 10 ), (2 ** 20 , 1 ), (None , 1 )]
620- )
621- def test_map_operator_output_unbundling (
616+ def _run_map_operator_test (
622617 ray_start_regular_shared ,
623618 use_actors ,
624619 preserve_order ,
625- target_max_block_size ,
626- num_expected_blocks ,
620+ transform_fn ,
621+ output_block_size_option ,
622+ expected_blocks ,
623+ test_name = "TestMapper" ,
627624):
628- # Tests that the MapOperator's output queue unbundles the bundles returned from
629- # tasks; this facilitates features such as dynamic block splitting.
630- def noop (block_iter : Iterable [Block ], ctx ) -> Iterable [Block ]:
631- for block in block_iter :
632- yield block
633-
625+ """Shared test function for MapOperator output unbundling tests."""
634626 # Create with inputs.
635627 input_op = InputDataBuffer (
636628 DataContext .get_current (), make_ref_bundles ([[i ] for i in range (10 )])
637629 )
638630 compute_strategy = ActorPoolStrategy () if use_actors else TaskPoolStrategy ()
639631
640632 transformer = create_map_transformer_from_block_fn (
641- noop ,
642- output_block_size_option = OutputBlockSizeOption .of (
643- target_max_block_size = target_max_block_size ,
644- ),
633+ transform_fn ,
634+ output_block_size_option = output_block_size_option ,
645635 )
646636
647637 op = MapOperator .create (
648638 transformer ,
649639 input_op = input_op ,
650640 data_context = DataContext .get_current (),
651- name = "TestMapper" ,
641+ name = test_name ,
652642 compute_strategy = compute_strategy ,
653- # Send the everything in a single bundle of 10 blocks.
643+ # Send everything in a single bundle of 10 blocks.
654644 min_rows_per_bundle = 10 ,
655645 )
656646
@@ -670,10 +660,125 @@ def noop(block_iter: Iterable[Block], ctx) -> Iterable[Block]:
670660 outputs = []
671661 while op .has_next ():
672662 outputs .append (op .get_next ())
673- assert len (outputs ) == num_expected_blocks
663+ assert len (outputs ) == expected_blocks
674664 assert op .completed ()
675665
676666
667+ @pytest .mark .parametrize ("use_actors" , [False , True ])
668+ @pytest .mark .parametrize ("preserve_order" , [False , True ])
669+ @pytest .mark .parametrize (
670+ "target_max_block_size,num_expected_blocks" , [(1 , 10 ), (2 ** 20 , 1 ), (None , 1 )]
671+ )
672+ def test_map_operator_output_unbundling (
673+ ray_start_regular_shared ,
674+ use_actors ,
675+ preserve_order ,
676+ target_max_block_size ,
677+ num_expected_blocks ,
678+ ):
679+ """Test that MapOperator's output queue unbundles bundles from tasks."""
680+
681+ def noop (block_iter : Iterable [Block ], ctx ) -> Iterable [Block ]:
682+ for block in block_iter :
683+ yield block
684+
685+ _run_map_operator_test (
686+ ray_start_regular_shared ,
687+ use_actors ,
688+ preserve_order ,
689+ noop ,
690+ OutputBlockSizeOption .of (target_max_block_size = target_max_block_size ),
691+ num_expected_blocks ,
692+ )
693+
694+
695+ @pytest .mark .parametrize ("preserve_order" , [False , True ])
696+ @pytest .mark .parametrize (
697+ "output_block_size_option,expected_blocks" ,
698+ [
699+ # Test target_max_block_size
700+ (OutputBlockSizeOption .of (target_max_block_size = 1 ), 10 ),
701+ (OutputBlockSizeOption .of (target_max_block_size = 2 ** 20 ), 1 ),
702+ (OutputBlockSizeOption .of (target_max_block_size = None ), 1 ),
703+ # Test target_num_rows_per_block
704+ (OutputBlockSizeOption .of (target_num_rows_per_block = 1 ), 10 ),
705+ (OutputBlockSizeOption .of (target_num_rows_per_block = 5 ), 2 ),
706+ (OutputBlockSizeOption .of (target_num_rows_per_block = 10 ), 1 ),
707+ (OutputBlockSizeOption .of (target_num_rows_per_block = None ), 1 ),
708+ # Test disable_block_shaping
709+ (OutputBlockSizeOption .of (disable_block_shaping = True ), 10 ),
710+ (OutputBlockSizeOption .of (disable_block_shaping = False ), 1 ),
711+ # Test combinations
712+ (
713+ OutputBlockSizeOption .of (
714+ target_max_block_size = 1 , target_num_rows_per_block = 5
715+ ),
716+ 10 ,
717+ ),
718+ (
719+ OutputBlockSizeOption .of (
720+ target_max_block_size = 2 ** 20 , disable_block_shaping = True
721+ ),
722+ 10 ,
723+ ),
724+ (
725+ OutputBlockSizeOption .of (
726+ target_num_rows_per_block = 5 , disable_block_shaping = True
727+ ),
728+ 10 ,
729+ ),
730+ ],
731+ )
732+ def test_map_operator_output_block_size_options (
733+ ray_start_regular_shared ,
734+ preserve_order ,
735+ output_block_size_option ,
736+ expected_blocks ,
737+ ):
738+ """Test MapOperator with various OutputBlockSizeOption configurations."""
739+
740+ def noop (block_iter : Iterable [Block ], ctx ) -> Iterable [Block ]:
741+ for block in block_iter :
742+ yield block
743+
744+ _run_map_operator_test (
745+ ray_start_regular_shared ,
746+ use_actors = False ,
747+ preserve_order = preserve_order ,
748+ transform_fn = noop ,
749+ output_block_size_option = output_block_size_option ,
750+ expected_blocks = expected_blocks ,
751+ )
752+
753+
754+ @pytest .mark .parametrize ("preserve_order" , [False , True ])
755+ def test_map_operator_disable_block_shaping_with_batches (
756+ ray_start_regular_shared ,
757+ preserve_order ,
758+ ):
759+ """Test MapOperator with disable_block_shaping=True using batch operations."""
760+
761+ def batch_transform (batch_iter , ctx ):
762+ for batch in batch_iter :
763+ # Simple transformation: add 1 to each value
764+ if hasattr (batch , "to_pandas" ):
765+ df = batch .to_pandas ()
766+ df = df + 1
767+ yield df
768+ else :
769+ yield batch
770+
771+ _run_map_operator_test (
772+ ray_start_regular_shared ,
773+ use_actors = False ,
774+ preserve_order = preserve_order ,
775+ transform_fn = batch_transform ,
776+ output_block_size_option = OutputBlockSizeOption .of (disable_block_shaping = True ),
777+ expected_blocks = 10 , # With disable_block_shaping=True, we expect 10 blocks
778+ test_name = "TestBatchMapper" ,
779+ )
780+
781+
677782@pytest .mark .parametrize ("use_actors" , [False , True ])
678783def test_map_operator_ray_args (shutdown_only , use_actors ):
679784 ray .shutdown ()
0 commit comments