diff --git a/CHANGES.md b/CHANGES.md index 62e090a87324..e694e0c0d2f3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -71,6 +71,8 @@ * Prism is a portable runner that executes each transform independantly, ensuring coders. * At this point it supercedes the Go direct runner in functionality. The Go direct runner is now deprecated. * See https://github.com/apache/beam/blob/master/sdks/go/pkg/beam/runners/prism/README.md for the goals and features of Prism. +* Hugging Face Model Handler for RunInference added to Python SDK. ([#26632](https://github.com/apache/beam/pull/26632)) +* Hugging Face Pipelines support for RunInference added to Python SDK. ([#27399](https://github.com/apache/beam/pull/27399)) ## Breaking Changes diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference.py b/sdks/python/apache_beam/ml/inference/huggingface_inference.py index 09201d3b080b..57eb86d84820 100644 --- a/sdks/python/apache_beam/ml/inference/huggingface_inference.py +++ b/sdks/python/apache_beam/ml/inference/huggingface_inference.py @@ -74,7 +74,8 @@ class PipelineTask(str, Enum): """ - PipelineTask lists all the tasks supported by the Hugging Face Pipelines. + PipelineTask defines all the tasks supported by the Hugging Face Pipelines + listed at https://huggingface.co/docs/transformers/main_classes/pipelines. Only these tasks can be passed to HuggingFacePipelineModelHandler. """ AudioClassification = 'audio-classification' @@ -626,6 +627,7 @@ def __init__( _validate_constructor_args_hf_pipeline(self._task, self._model) def load_model(self): + """Loads and initializes the pipeline for processing.""" return pipeline( task=self._task, model=self._model, **self._load_pipeline_args) @@ -664,6 +666,10 @@ def update_model_path(self, model_path: Optional[str] = None): self._model = model_path if model_path else self._model def get_num_bytes(self, batch: Sequence[str]) -> int: + """ + Returns: + The number of bytes of input batch elements. + """ return sum(sys.getsizeof(element) for element in batch) def batch_elements_kwargs(self): @@ -673,4 +679,8 @@ def share_model_across_processes(self) -> bool: return self._large_model def get_metrics_namespace(self) -> str: + """ + Returns: + A namespace for metrics collected by the RunInference transform. + """ return 'BeamML_HuggingFacePipelineModelHandler'