Skip to content

Commit ff62312

Browse files
authored
[Data] Make internal UDF names more descriptive (#44985)
APIs like `select_columns` call `map_batches` under-the-hood and use functions with non-descriptives names. For example, if you call `select_columns`, you'd see something like this in the progress bar: ``` ReadRange->MapBatches(fn) ``` To prevent confusion (e.g., what is `fn`?), this PR makes the function names more descriptive.
1 parent 10f7f2a commit ff62312

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

python/ray/data/dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -647,15 +647,15 @@ def add_column(
647647
ray (e.g., num_gpus=1 to request GPUs for the map tasks).
648648
"""
649649

650-
def process_batch(batch: "pandas.DataFrame") -> "pandas.DataFrame":
650+
def add_column(batch: "pandas.DataFrame") -> "pandas.DataFrame":
651651
batch.loc[:, col] = fn(batch)
652652
return batch
653653

654654
if not callable(fn):
655655
raise ValueError("`fn` must be callable, got {}".format(fn))
656656

657657
return self.map_batches(
658-
process_batch,
658+
add_column,
659659
batch_format="pandas", # TODO(ekl) we should make this configurable.
660660
compute=compute,
661661
concurrency=concurrency,
@@ -761,11 +761,11 @@ def select_columns(
761761
ray (e.g., num_gpus=1 to request GPUs for the map tasks).
762762
""" # noqa: E501
763763

764-
def fn(batch):
764+
def select_columns(batch):
765765
return BlockAccessor.for_block(batch).select(columns=cols)
766766

767767
return self.map_batches(
768-
fn,
768+
select_columns,
769769
batch_format="pandas",
770770
zero_copy_batch=True,
771771
compute=compute,
@@ -1119,7 +1119,7 @@ def random_sample(
11191119
if seed is not None:
11201120
random.seed(seed)
11211121

1122-
def process_batch(batch):
1122+
def random_sample(batch):
11231123
if isinstance(batch, list):
11241124
return [row for row in batch if random.random() <= fraction]
11251125
if isinstance(batch, pa.Table):
@@ -1135,7 +1135,7 @@ def process_batch(batch):
11351135
)
11361136
raise ValueError(f"Unsupported batch type: {type(batch)}")
11371137

1138-
return self.map_batches(process_batch, batch_format=None)
1138+
return self.map_batches(random_sample, batch_format=None)
11391139

11401140
@ConsumptionAPI
11411141
def streaming_split(

0 commit comments

Comments
 (0)