Support passing model parameters in pipelines, closes #500

neuml · Jul 8, 2023 · 4c205c2 · 4c205c2
1 parent 197f681
commit 4c205c2
Show file tree

Hide file tree

Showing 16 changed files with 97 additions and 41 deletions.
diff --git a/src/python/txtai/pipeline/audio/transcription.py b/src/python/txtai/pipeline/audio/transcription.py
@@ -19,12 +19,12 @@ class Transcription(HFPipeline):
     Transcribes audio files or data to text.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
         if not SOUNDFILE:
             raise ImportError("SoundFile library not installed or libsndfile not found")
 
         # Call parent constructor
-        super().__init__("automatic-speech-recognition", path, quantize, gpu, model)
+        super().__init__("automatic-speech-recognition", path, quantize, gpu, model, **kwargs)
 
     def __call__(self, audio, rate=None, chunk=10, join=True):
         """

diff --git a/src/python/txtai/pipeline/hfpipeline.py b/src/python/txtai/pipeline/hfpipeline.py
@@ -2,9 +2,13 @@
 Hugging Face Transformers pipeline wrapper module
 """
 
+import inspect
+
 from transformers import pipeline
 
 from ..models import Models
+from ..util import Resolver
+
 from .tensors import Tensors
 
 
@@ -14,7 +18,7 @@ class HFPipeline(Tensors):
     quantization and minor interface changes.
     """
 
-    def __init__(self, task, path=None, quantize=False, gpu=False, model=None):
+    def __init__(self, task, path=None, quantize=False, gpu=False, model=None, **kwargs):
         """
         Loads a new pipeline model.
 
@@ -25,23 +29,30 @@ def __init__(self, task, path=None, quantize=False, gpu=False, model=None):
             quantize: if model should be quantized, defaults to False
             gpu: True/False if GPU should be enabled, also supports a GPU device id
             model: optional existing pipeline model to wrap
+            kwargs: additional keyword arguments to pass to pipeline model
         """
 
         if model:
             # Check if input model is a Pipeline or a HF pipeline
             self.pipeline = model.pipeline if isinstance(model, HFPipeline) else model
         else:
             # Get device id
-            deviceid = Models.deviceid(gpu)
+            deviceid = Models.deviceid(gpu) if "device_map" not in kwargs else None
+
+            # Split into model args, pipeline args
+            modelargs, kwargs = self.parseargs(**kwargs)
 
             # Transformer pipeline task
             if isinstance(path, (list, tuple)):
                 # Derive configuration, if possible
                 config = path[1] if path[1] and isinstance(path[1], str) else None
 
-                self.pipeline = pipeline(task, model=Models.load(path[0], config, task), tokenizer=path[1], device=deviceid)
+                # Load model
+                model = Models.load(path[0], config, task)
+
+                self.pipeline = pipeline(task, model=model, tokenizer=path[1], device=deviceid, model_kwargs=modelargs, **kwargs)
             else:
-                self.pipeline = pipeline(task, model=path, device=deviceid)
+                self.pipeline = pipeline(task, model=path, device=deviceid, model_kwargs=modelargs, **kwargs)
 
             # Model quantization. Compresses model to int8 precision, improves runtime performance. Only supported on CPU.
             if deviceid == -1 and quantize:
@@ -51,6 +62,28 @@ def __init__(self, task, path=None, quantize=False, gpu=False, model=None):
         # Detect unbounded tokenizer typically found in older models
         Models.checklength(self.pipeline.model, self.pipeline.tokenizer)
 
+    def parseargs(self, **kwargs):
+        """
+        Inspects the pipeline method and splits kwargs into model args and pipeline args.
+
+        Args:
+            kwargs: all keyword arguments
+
+        Returns:
+            (model args, pipeline args)
+        """
+
+        # Get pipeline method arguments
+        args = inspect.getfullargspec(pipeline).args
+
+        # Resolve torch dtype, if necessary
+        dtype = kwargs.get("torch_dtype")
+        if dtype and isinstance(dtype, str) and dtype != "auto":
+            kwargs["torch_dtype"] = Resolver()(dtype)
+
+        # Split into modelargs and kwargs
+        return ({arg: value for arg, value in kwargs.items() if arg not in args}, {arg: value for arg, value in kwargs.items() if arg in args})
+
     def maxlength(self):
         """
         Gets the max length to use for generate calls.

diff --git a/src/python/txtai/pipeline/image/caption.py b/src/python/txtai/pipeline/image/caption.py
@@ -18,12 +18,12 @@ class Caption(HFPipeline):
     Constructs captions for images.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
         if not PIL:
             raise ImportError('Captions pipeline is not available - install "pipeline" extra to enable')
 
         # Call parent constructor
-        super().__init__("image-to-text", path, quantize, gpu, model)
+        super().__init__("image-to-text", path, quantize, gpu, model, **kwargs)
 
     def __call__(self, images):
         """

diff --git a/src/python/txtai/pipeline/image/objects.py b/src/python/txtai/pipeline/image/objects.py
@@ -18,11 +18,11 @@ class Objects(HFPipeline):
     Applies object detection models to images. Supports both object detection models and image classification models.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None, classification=False, threshold=0.9):
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, classification=False, threshold=0.9, **kwargs):
         if not PIL:
             raise ImportError('Objects pipeline is not available - install "pipeline" extra to enable')
 
-        super().__init__("image-classification" if classification else "object-detection", path, quantize, gpu, model)
+        super().__init__("image-classification" if classification else "object-detection", path, quantize, gpu, model, **kwargs)
 
         self.classification = classification
         self.threshold = threshold

diff --git a/src/python/txtai/pipeline/text/crossencoder.py b/src/python/txtai/pipeline/text/crossencoder.py
@@ -12,8 +12,8 @@ class CrossEncoder(HFPipeline):
     Computes similarity between query and list of text using a cross-encoder model
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__("text-classification", path, quantize, gpu, model)
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
+        super().__init__("text-classification", path, quantize, gpu, model, **kwargs)
 
     def __call__(self, query, texts, multilabel=True, workers=0):
         """

diff --git a/src/python/txtai/pipeline/text/entity.py b/src/python/txtai/pipeline/text/entity.py
@@ -10,8 +10,8 @@ class Entity(HFPipeline):
     Applies a token classifier to text and extracts entity/label combinations.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__("token-classification", path, quantize, gpu, model)
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
+        super().__init__("token-classification", path, quantize, gpu, model, **kwargs)
 
     def __call__(self, text, labels=None, aggregate="simple", flatten=None, join=False, workers=0):
         """

diff --git a/src/python/txtai/pipeline/text/extractor.py b/src/python/txtai/pipeline/text/extractor.py
@@ -34,6 +34,7 @@ def __init__(
         context=None,
         task=None,
         output="default",
+        **kwargs,
     ):
         """
         Builds a new extractor.
@@ -50,13 +51,14 @@ def __init__(
             context: topn context matches to include, defaults to 3
             task: model task (language-generation, sequence-sequence or question-answering), defaults to auto-detect
             output: output format, 'default' returns (name, answer), 'flatten' returns answers and 'reference' returns (name, answer, reference)
+            kwargs: additional keyword arguments to pass to pipeline model
         """
 
         # Similarity instance
         self.similarity = similarity
 
         # Question-Answer model. Can be prompt-driven LLM or extractive qa
-        self.model = self.load(path, quantize, gpu, model, task)
+        self.model = self.load(path, quantize, gpu, model, task, **kwargs)
 
         # Tokenizer class use default method if not set
         self.tokenizer = tokenizer if tokenizer else Tokenizer() if hasattr(self.similarity, "scoring") and self.similarity.scoring else None
@@ -119,7 +121,7 @@ def __call__(self, queue, texts=None):
         # Apply output formatting to answers and return
         return self.apply(inputs, queries, answers, topns)
 
-    def load(self, path, quantize, gpu, model, task):
+    def load(self, path, quantize, gpu, model, task, **kwargs):
         """
         Loads a question-answer model.
 
@@ -129,24 +131,25 @@ def load(self, path, quantize, gpu, model, task):
             gpu: if gpu inference should be used (only works if GPUs are available)
             model: optional existing pipeline model to wrap
             task: model task (language-generation, sequence-sequence or question-answering), defaults to auto-detect
+            kwargs: additional keyword arguments to pass to pipeline model
 
         Returns:
             Generator, Sequences, Questions or custom pipeline
         """
 
-        # Check if path is already a pipeline
-        if isinstance(path, Pipeline):
+        # Only try to load if path is a string
+        if not isinstance(path, str):
             return path
 
         # Attempt to resolve task if not provided
-        task = task if task else Models.task(path)
+        task = task if task else Models.task(path, **kwargs)
 
         # Load model as Question pipeline
         if task == "question-answering":
-            return Questions(path, quantize, gpu, model)
+            return Questions(path, quantize, gpu, model, **kwargs)
 
         # Load model as LLM pipeline
-        return LLM(path, quantize, gpu, model, task)
+        return LLM(path, quantize, gpu, model, task, **kwargs)
 
     def query(self, queries, texts):
         """

diff --git a/src/python/txtai/pipeline/text/generator.py b/src/python/txtai/pipeline/text/generator.py
@@ -10,5 +10,5 @@ class Generator(LLM):
     Generate text with a causal language model.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__(path, quantize, gpu, model, "language-generation")
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
+        super().__init__(path, quantize, gpu, model, "language-generation", **kwargs)
diff --git a/src/python/txtai/pipeline/text/labels.py b/src/python/txtai/pipeline/text/labels.py
@@ -10,8 +10,8 @@ class Labels(HFPipeline):
     Applies a text classifier to text. Supports zero shot and standard text classification models
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None, dynamic=True):
-        super().__init__("zero-shot-classification" if dynamic else "text-classification", path, quantize, gpu, model)
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, dynamic=True, **kwargs):
+        super().__init__("zero-shot-classification" if dynamic else "text-classification", path, quantize, gpu, model, **kwargs)
 
         # Set if labels are dynamic (zero shot) or fixed (standard text classification)
         self.dynamic = dynamic

diff --git a/src/python/txtai/pipeline/text/llm.py b/src/python/txtai/pipeline/text/llm.py
@@ -13,8 +13,11 @@ class LLM(HFPipeline):
     sequence to sequence model.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None, task=None):
-        super().__init__(self.task(path, task), path if path else "google/flan-t5-base", quantize, gpu, model)
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, task=None, **kwargs):
+        super().__init__(self.task(path, task, **kwargs), path if path else "google/flan-t5-base", quantize, gpu, model, **kwargs)
+
+        # Load tokenizer, if necessary
+        self.pipeline.tokenizer = self.pipeline.tokenizer if self.pipeline.tokenizer else Models.tokenizer(path, **kwargs)
 
     def __call__(self, text, prefix=None, maxlength=512, workers=0, **kwargs):
         """
@@ -70,13 +73,14 @@ def clean(self, prompt, result):
         # Apply text cleaning rules
         return text.replace("$=", "<=").strip()
 
-    def task(self, path, task):
+    def task(self, path, task, **kwargs):
         """
         Get the pipeline task name.
 
         Args:
             path: model path input
             task: task name
+            kwargs: optional additional keyword arguments
 
         Returns:
             pipeline task name
@@ -87,7 +91,7 @@ def task(self, path, task):
 
         # Attempt to resolve task
         if path and not task:
-            task = Models.task(path)
+            task = Models.task(path, **kwargs)
 
         # Map to Hugging Face task. Default to text2text-generation pipeline when task not resolved.
         return mapping.get(task, "text2text-generation")
diff --git a/src/python/txtai/pipeline/text/questions.py b/src/python/txtai/pipeline/text/questions.py
@@ -10,8 +10,8 @@ class Questions(HFPipeline):
     Runs extractive QA for a series of questions and contexts.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__("question-answering", path, quantize, gpu, model)
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
+        super().__init__("question-answering", path, quantize, gpu, model, **kwargs)
 
     def __call__(self, questions, contexts, workers=0):
         """

diff --git a/src/python/txtai/pipeline/text/sequences.py b/src/python/txtai/pipeline/text/sequences.py
@@ -10,5 +10,5 @@ class Sequences(LLM):
     Runs text through a sequence-sequence model.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__(path, quantize, gpu, model, "sequence-sequence")
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
+        super().__init__(path, quantize, gpu, model, "sequence-sequence", **kwargs)
diff --git a/src/python/txtai/pipeline/text/similarity.py b/src/python/txtai/pipeline/text/similarity.py
@@ -13,9 +13,9 @@ class Similarity(Labels):
     Computes similarity between query and list of text using a text classifier.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None, dynamic=True, crossencode=False):
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, dynamic=True, crossencode=False, **kwargs):
         # Use zero-shot classification if dynamic is True and crossencode is False, otherwise use standard text classification
-        super().__init__(path, quantize, gpu, model, False if crossencode else dynamic)
+        super().__init__(path, quantize, gpu, model, False if crossencode else dynamic, **kwargs)
 
         # Load as a cross-encoder if crossencode set to True
         self.crossencoder = CrossEncoder(model=self.pipeline) if crossencode else None

diff --git a/src/python/txtai/pipeline/text/summary.py b/src/python/txtai/pipeline/text/summary.py
@@ -12,8 +12,8 @@ class Summary(HFPipeline):
     Summarizes text.
     """
 
-    def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__("summarization", path, quantize, gpu, model)
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, **kwargs):
+        super().__init__("summarization", path, quantize, gpu, model, **kwargs)
 
     def __call__(self, text, minlength=None, maxlength=None, workers=0):
         """

diff --git a/test/python/testpipeline/testgenerator.py b/test/python/testpipeline/testgenerator.py
@@ -20,5 +20,5 @@ def testGeneration(self):
         model = Generator("hf-internal-testing/tiny-random-gpt2")
         start = "Hello, how are"
 
-        # Test that text is generator
-        self.assertGreater(len(model(start)), len(start))
+        # Test that text is generated
+        self.assertIsNotNone(model(start))
diff --git a/test/python/testpipeline/testllm.py b/test/python/testpipeline/testllm.py
@@ -4,6 +4,8 @@
 
 import unittest
 
+import torch
+
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from txtai.pipeline import LLM
@@ -14,6 +16,20 @@ class TestLLM(unittest.TestCase):
     LLM tests.
     """
 
+    def testArguments(self):
+        """
+        Test pipeline keyword arguments
+        """
+
+        start = "Hello, how are"
+
+        # Test that text is generated with custom parameters
+        model = LLM("hf-internal-testing/tiny-random-gpt2", task="language-generation", torch_dtype="torch.float32")
+        self.assertIsNotNone(model(start))
+
+        model = LLM("hf-internal-testing/tiny-random-gpt2", task="language-generation", torch_dtype=torch.float32)
+        self.assertIsNotNone(model(start))
+
     def testExternal(self):
         """
         Test externally loaded model
@@ -25,5 +41,5 @@ def testExternal(self):
         model = LLM((model, tokenizer))
         start = "Hello, how are"
 
-        # Test that text is generator
-        self.assertGreater(len(model(start)), len(start))
+        # Test that text is generated
+        self.assertIsNotNone(model(start))