Refactor Sequences and Generator pipeline into single LLM pipeline, c…

…loses #494
neuml · Jul 2, 2023 · 847b7e4 · 847b7e4
1 parent a833eef
commit 847b7e4
Show file tree

Hide file tree

Showing 7 changed files with 179 additions and 95 deletions.
diff --git a/src/python/txtai/models/models.py b/src/python/txtai/models/models.py
@@ -6,7 +6,7 @@
 
 import torch
 
-from transformers import AutoModel, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
+from transformers import AutoConfig, AutoModel, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
 
 from .onnx import OnnxModel
 
@@ -184,3 +184,36 @@ def load(path, config=None, task="default"):
 
         # Load model for supported tasks. Return path for unsupported tasks.
         return models[task](path) if task in models else path
+
+    @staticmethod
+    def task(path):
+        """
+        Attempts to detect the model task from path.
+
+        Args:
+            path: path to model
+
+        Returns:
+            inferred model task
+        """
+
+        # Get model configuration
+        config = None
+        if isinstance(path, (list, tuple)) and hasattr(path[0], "config"):
+            config = path[0].config
+        elif isinstance(path, str):
+            config = AutoConfig.from_pretrained(path)
+
+        # Attempt to resolve task using configuration
+        task = None
+        if config:
+            architecture = config.architectures[0] if config.architectures else None
+            if architecture:
+                if any(x for x in ["LMHead", "CausalLM"] if x in architecture):
+                    task = "language-generation"
+                elif "QuestionAnswering" in architecture:
+                    task = "question-answering"
+                elif "ConditionalGeneration" in architecture:
+                    task = "sequence-sequence"
+
+        return task
diff --git a/src/python/txtai/pipeline/text/__init__.py b/src/python/txtai/pipeline/text/__init__.py
@@ -7,6 +7,7 @@
 from .extractor import Extractor
 from .generator import Generator
 from .labels import Labels
+from .llm import LLM
 from .questions import Questions
 from .sequences import Sequences
 from .similarity import Similarity

diff --git a/src/python/txtai/pipeline/text/extractor.py b/src/python/txtai/pipeline/text/extractor.py
@@ -2,22 +2,22 @@
 Extractor module
 """
 
-from transformers import AutoConfig
+from ...models import Models
 
 from ..base import Pipeline
 from ..data import Tokenizer
 
-from .generator import Generator
+from .llm import LLM
 from .questions import Questions
-from .sequences import Sequences
 from .similarity import Similarity
 
 
 class Extractor(Pipeline):
     """
-    Finds answers for a set of queries/questions. The extractor is a combination of a similarity instance (embeddings or similarity pipeline)
-    to build a question context and a model that answers questions. The model can be either a prompt-driven large language model (LLM),
-    an extractive question-answering model or a custom pipeline.
+    Extracts knowledge from content by joining a prompt, context data store and generative model together. The data store can be
+    an embeddings database or a similarity instance with associated input text. The generative model can be a prompt-driven large
+    language model (LLM), an extractive question-answering model or a custom pipeline. This is known as prompt-driven search or
+    retrieval augmented generation (RAG).
     """
 
     # pylint: disable=R0913
@@ -134,27 +134,19 @@ def load(self, path, quantize, gpu, model, task):
             Generator, Sequences, Questions or custom pipeline
         """
 
-        # If path is not a string, return input
-        if not isinstance(path, str):
+        # Check if path is already a pipeline
+        if isinstance(path, Pipeline):
             return path
 
-        # Autodetect task if not provided
-        if not task:
-            config = AutoConfig.from_pretrained(path)
-            architecture = config.architectures[0] if config.architectures else None
+        # Attempt to resolve task if not provided
+        task = task if task else Models.task(path)
 
-            if any(x for x in ["LMHead", "CausalLM"] if x in architecture):
-                task = "language-generation"
-            elif "ConditionalGeneration" in architecture:
-                task = "sequence-sequence"
+        # Load model as Question pipeline
+        if task == "question-answering":
+            return Questions(path, quantize, gpu, model)
 
-        if task == "language-generation":
-            return Generator(path, quantize, gpu, model)
-        if task == "sequence-sequence":
-            return Sequences(path, quantize, gpu, model)
-
-        # Default to question-answering
-        return Questions(path, quantize, gpu, model)
+        # Load model as LLM pipeline
+        return LLM(path, quantize, gpu, model, task)
 
     def query(self, queries, texts):
         """

diff --git a/src/python/txtai/pipeline/text/generator.py b/src/python/txtai/pipeline/text/generator.py
@@ -2,77 +2,13 @@
 Generator module
 """
 
-from ..hfpipeline import HFPipeline
+from .llm import LLM
 
 
-class Generator(HFPipeline):
+class Generator(LLM):
     """
     Generate text with a causal language model.
     """
 
     def __init__(self, path=None, quantize=False, gpu=True, model=None):
-        super().__init__(self.task(), path, quantize, gpu, model)
-
-    def __call__(self, text, prefix=None, maxlength=512, workers=0, **kwargs):
-        """
-        Generates text using input text
-
-        Args:
-            text: text|list
-            prefix: optional prefix to prepend to text elements
-            maxlength: maximum sequence length
-            workers: number of concurrent workers to use for processing data, defaults to None
-            kwargs: additional generation keyword arguments
-
-        Returns:
-            generated text
-        """
-
-        # List of texts
-        texts = text if isinstance(text, list) else [text]
-
-        # Add prefix, if necessary
-        if prefix:
-            texts = [f"{prefix}{x}" for x in texts]
-
-        # Run pipeline
-        results = self.pipeline(texts, max_length=maxlength, num_workers=workers, **kwargs)
-
-        # Get generated text
-        results = [self.clean(texts[x], result) for x, result in enumerate(results)]
-
-        return results[0] if isinstance(text, str) else results
-
-    def clean(self, prompt, result):
-        """
-        Applies a series of rules to clean generated text.
-
-        Args:
-            prompt: original input prompt
-            result: input result
-
-        Returns:
-            clean text
-        """
-
-        # Extract output from list, if necessary
-        result = result[0] if isinstance(result, list) else result
-
-        # Get generated text field
-        text = result["generated_text"]
-
-        # Replace input prompt
-        text = text.replace(prompt, "")
-
-        # Apply text cleaning rules
-        return text.replace("$=", "<=").strip()
-
-    def task(self):
-        """
-        Get the pipeline task name.
-
-        Returns:
-            pipeline task name
-        """
-
-        return "text-generation"
+        super().__init__(path, quantize, gpu, model, "language-generation")
diff --git a/src/python/txtai/pipeline/text/llm.py b/src/python/txtai/pipeline/text/llm.py
@@ -0,0 +1,93 @@
+"""
+LLM Module
+"""
+
+from ...models import Models
+
+from ..hfpipeline import HFPipeline
+
+
+class LLM(HFPipeline):
+    """
+    Runs prompt text through a large language model (LLM). This pipeline autodetects if the input path is a text generation or
+    sequence to sequence model.
+    """
+
+    def __init__(self, path=None, quantize=False, gpu=True, model=None, task=None):
+        super().__init__(self.task(path, task), path if path else "google/flan-t5-base", quantize, gpu, model)
+
+    def __call__(self, text, prefix=None, maxlength=512, workers=0, **kwargs):
+        """
+        Generates text using input text
+
+        Args:
+            text: text|list
+            prefix: optional prefix to prepend to text elements
+            maxlength: maximum sequence length
+            workers: number of concurrent workers to use for processing data, defaults to None
+            kwargs: additional generation keyword arguments
+
+        Returns:
+            generated text
+        """
+
+        # List of texts
+        texts = text if isinstance(text, list) else [text]
+
+        # Add prefix, if necessary
+        if prefix:
+            texts = [f"{prefix}{x}" for x in texts]
+
+        # Run pipeline
+        results = self.pipeline(texts, max_length=maxlength, num_workers=workers, **kwargs)
+
+        # Get generated text
+        results = [self.clean(texts[x], result) for x, result in enumerate(results)]
+
+        return results[0] if isinstance(text, str) else results
+
+    def clean(self, prompt, result):
+        """
+        Applies a series of rules to clean generated text.
+
+        Args:
+            prompt: original input prompt
+            result: input result
+
+        Returns:
+            clean text
+        """
+
+        # Extract output from list, if necessary
+        result = result[0] if isinstance(result, list) else result
+
+        # Get generated text field
+        text = result["generated_text"]
+
+        # Replace input prompt
+        text = text.replace(prompt, "")
+
+        # Apply text cleaning rules
+        return text.replace("$=", "<=").strip()
+
+    def task(self, path, task):
+        """
+        Get the pipeline task name.
+
+        Args:
+            path: model path input
+            task: task name
+
+        Returns:
+            pipeline task name
+        """
+
+        # Mapping from txtai to Hugging Face pipeline tasks
+        mapping = {"language-generation": "text-generation", "sequence-sequence": "text2text-generation"}
+
+        # Attempt to resolve task
+        if path and not task:
+            task = Models.task(path)
+
+        # Map to Hugging Face task. Default to text2text-generation pipeline when task not resolved.
+        return mapping.get(task, "text2text-generation")
diff --git a/src/python/txtai/pipeline/text/sequences.py b/src/python/txtai/pipeline/text/sequences.py
@@ -2,13 +2,13 @@
 Sequences module
 """
 
-from .generator import Generator
+from .llm import LLM
 
 
-class Sequences(Generator):
+class Sequences(LLM):
     """
     Runs text through a sequence-sequence model.
     """
 
-    def task(self):
-        return "text2text-generation"
+    def __init__(self, path=None, quantize=False, gpu=True, model=None):
+        super().__init__(path, quantize, gpu, model, "sequence-sequence")
diff --git a/test/python/testpipeline/testllm.py b/test/python/testpipeline/testllm.py
@@ -0,0 +1,29 @@
+"""
+LLM module tests
+"""
+
+import unittest
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from txtai.pipeline import LLM
+
+
+class TestLLM(unittest.TestCase):
+    """
+    LLM tests.
+    """
+
+    def testExternal(self):
+        """
+        Test externally loaded model
+        """
+
+        model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
+        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
+
+        model = LLM((model, tokenizer))
+        start = "Hello, how are"
+
+        # Test that text is generator
+        self.assertGreater(len(model(start)), len(start))