Merge pull request #277 from Eyobyb/feature/number-verification

number validation
Aggregate-Intellect · Feb 7, 2024 · de992e8 · de992e8
2 parents 1547d26 + d5a74c8
commit de992e8
Show file tree

Hide file tree

Showing 17 changed files with 372 additions and 81 deletions.
diff --git a/src/sherpa_ai/agents/qa_agent.py b/src/sherpa_ai/agents/qa_agent.py
@@ -1,15 +1,19 @@
-from typing import List
+from typing import List, Optional
 
 from langchain.base_language import BaseLanguageModel
+from loguru import logger
 
 from sherpa_ai.action_planner import ActionPlanner
 from sherpa_ai.actions import GoogleSearch, SynthesizeOutput
 from sherpa_ai.actions.base import BaseAction
 from sherpa_ai.agents.base import BaseAgent
 from sherpa_ai.config import AgentConfig
+from sherpa_ai.events import EventType
 from sherpa_ai.memory import Belief
 from sherpa_ai.memory.shared_memory import SharedMemory
 from sherpa_ai.output_parsers.citation_validation import CitationValidation
+from sherpa_ai.output_parsers.number_validation import NumberValidation
+from sherpa_ai.output_parsers.validation_result import ValidationResult
 from sherpa_ai.verbose_loggers.verbose_loggers import DummyVerboseLogger
 
 # TODO: QA Agent only contains partial implementation from the original
@@ -33,11 +37,13 @@ def __init__(
         name: str = "QA Agent",
         description: str = TASK_AGENT_DESRIPTION,
         shared_memory: SharedMemory = None,
-        belief: Belief = Belief(),
+        belief: Optional[Belief] = None,
         agent_config: AgentConfig = AgentConfig(),
         num_runs: int = 3,
         verbose_logger=DummyVerboseLogger(),
         require_meta=False,
+        perform_number_validation=False,
+        validation_count: int = 3,
         citation_thresh=[
             0.65,
             0.65,
@@ -66,14 +72,19 @@ def __init__(
         self.name = name
         self.description = description + "\n\n" + f"Your name is {name}."
         self.shared_memory = shared_memory
-        self.belief = belief
         self.num_runs = num_runs
         self.llm = llm
         self.action_planner = ActionPlanner(description, ACTION_PLAN_DESCRIPTION, llm)
         self.verbose_logger = verbose_logger
         self.require_meta = require_meta
         self.citation_thresh = citation_thresh
         self.config = agent_config
+        self.validation_count = validation_count
+        self.perform_number_validation = perform_number_validation
+
+        if belief is None:
+            belief = Belief()
+        self.belief = belief
 
     def create_actions(self) -> List[BaseAction]:
         return [
@@ -96,11 +107,64 @@ def synthesize_output(self) -> str:
             self.belief.get_internal_history(self.llm.get_num_tokens),
         )
 
+        self.belief.update_internal(EventType.result, self.name, result)
+
+        number_validation = self.num_validation(
+            result=result, synthesize_action=synthesize_action
+        )
+        return number_validation
+
+    def num_validation(self, result, synthesize_action) -> str:
+        if not self.perform_number_validation and not self.require_meta:
+            return result
+
+        count = 0
+        while count < self.validation_count:
+            validation_result = self.process_output(result)
+
+            if validation_result.is_valid or count == self.validation_count:
+                result = validation_result.result
+                break
+            else:
+                count += 1
+                self.belief.update_internal(
+                    EventType.feedback, "critic", validation_result.feedback
+                )
+
+            result = synthesize_action.execute(
+                self.belief.current_task.content,
+                self.belief.get_context(self.llm.get_num_tokens),
+                self.belief.get_histories_excluding_types(
+                    token_counter=self.llm.get_num_tokens,
+                    exclude_type=[EventType.result],
+                ),
+            )
+
+            # update intermidiate belief for round
+            self.belief.update_internal(EventType.result, self.name, result)
+            if count == self.validation_count:
+                result = (
+                    result
+                    + "The numeric value results might not be fully reliable. Exercise caution and consider alternative sources if possible."
+                )
+
+        self.belief.update_internal(EventType.result, self.name, result)
+        return result
+
+    def process_output(self, generated: str) -> ValidationResult:
+        if self.perform_number_validation:
+            internal_history = self.belief.get_histories_excluding_types(
+                token_counter=self.llm.get_num_tokens,
+                exclude_type=[EventType.feedback, EventType.result],
+            )
+            num_val = NumberValidation(internal_history)
+            result = num_val.process_output(generated)
+
         if self.require_meta:
-            result = self.add_citation(result)
+            result = self.add_citation(generated)
         return result
 
-    def add_citation(self, text) -> str:
+    def add_citation(self, text) -> ValidationResult:
         google = None
         for action in self.belief.actions:
             if isinstance(action, GoogleSearch):
@@ -110,12 +174,14 @@ def add_citation(self, text) -> str:
             self.citation_thresh[0], self.citation_thresh[1], self.citation_thresh[2]
         )
 
+        result = ValidationResult(
+            is_valid=True,
+            result=text,
+            feedback="",
+        )
         # only do citation validation if search was used
-        if len(google.meta) == 0:
-            return text
-
-        resource = google.meta[-1]
-
-        result = citation_module.parse_output(text, resource)
+        if len(google.meta) > 0:
+            resource = google.meta[-1]
+            result = citation_module.parse_output(text, resource)
 
         return result
diff --git a/src/sherpa_ai/memory/belief.py b/src/sherpa_ai/memory/belief.py
@@ -90,6 +90,36 @@ def get_internal_history(
 
         return context
 
+    def get_histories_excluding_types(
+        self, token_counter: Callable[[str], int],  exclude_type: [EventType] , max_tokens=4000 
+    ):
+        """
+        Get the internal history of the agent with out events in the excluded_type
+
+        Args:
+            token_counter: Token counter
+            max_tokens: Maximum number of tokens
+            exclude_type: List of events to be excluded 
+
+        Returns:
+            str: Internal history of the agent
+        """
+        results = []
+        feedback = []
+        current_tokens = 0
+        for event in reversed(self.internal_events):
+            if event.event_type not in exclude_type:
+                if event.event_type == EventType.feedback:
+                    feedback.append(event.content)
+                else:
+                    results.append(event.content)
+            current_tokens += token_counter(event.content)
+            if current_tokens > max_tokens:
+                break
+        context = "\n".join(set(reversed(results))) + "\n".join(set(feedback))
+        return context
+
+
     def set_actions(self, actions: List[BaseAction]):
         self.actions = actions
 

diff --git a/src/sherpa_ai/output_parsers/base.py b/src/sherpa_ai/output_parsers/base.py
@@ -1,7 +1,17 @@
 from abc import ABC, abstractmethod
+from typing import Tuple
 
 
 class BaseOutputParser(ABC):
     @abstractmethod
     def parse_output(self, text: str) -> str:
         pass
+
+
+class BaseOutputProcessor(ABC):
+    @abstractmethod
+    def process_output(self, text: str) -> Tuple[bool, str]:
+        pass
+
+    def __call__(self, text: str) -> Tuple[bool, str]:
+        return self.process_output(text)
diff --git a/src/sherpa_ai/output_parsers/citation_validation.py b/src/sherpa_ai/output_parsers/citation_validation.py
@@ -2,6 +2,7 @@
 from nltk.tokenize import sent_tokenize, word_tokenize
 
 from sherpa_ai.output_parsers.base import BaseOutputParser
+from sherpa_ai.output_parsers.validation_result import ValidationResult
 
 nltk.download("punkt")
 
@@ -72,10 +73,8 @@ def split_paragraph_into_sentences(self, paragraph):
         return sentences
 
     # add citation to the generated text
-    def parse_output(
-        self, generated: str, resources: list[dict()], activated=True
-    ) -> str:
-        """
+    def parse_output(self, generated: str, resources: list[dict]) -> ValidationResult:
+        """ 
         Add citation to each sentence in the generated text from resources based on fact checking model.
         Args:
             generated (str): The generated content where we need to add citation/reference
@@ -87,9 +86,8 @@ def parse_output(
             str: A formatted string combining the citation information from the 'resources' list.
         """
 
-        if not activated:
-            return generated
-
+        # resources type
+        # resources = [{"Document":, "Source":...}, {}]
         paragraph = generated.split("\n")
         paragraph = [p for p in paragraph if len(p.strip()) > 0]
 
@@ -147,4 +145,9 @@ def parse_output(
                     new_sentence.append(sentence)
 
             new_paragraph.append(" ".join(new_sentence) + "\n")
-        return "".join(new_paragraph)
+
+        return ValidationResult(
+            is_valid=True,
+            result="".join(new_paragraph),
+            feedback="",
+        )
diff --git a/src/sherpa_ai/output_parsers/number_validation.py b/src/sherpa_ai/output_parsers/number_validation.py
@@ -0,0 +1,29 @@
+from typing import Tuple
+
+from sherpa_ai.memory import Belief
+from sherpa_ai.output_parsers.base import BaseOutputProcessor
+from sherpa_ai.output_parsers.validation_result import ValidationResult
+from sherpa_ai.utils import check_if_number_exist
+
+
+class NumberValidation(BaseOutputProcessor):
+    def __init__(
+        self,
+        source: str,
+    ):
+        self.source = source
+
+    def process_output(self, text: str) -> ValidationResult:
+        check_validation = check_if_number_exist(text, self.source)
+        if check_validation["number_exists"]:
+            return ValidationResult(
+                is_valid=True,
+                result=text,
+                feedback="",
+            )
+        else:
+            return ValidationResult(
+                is_valid=False,
+                result=text,
+                feedback=check_validation["messages"],
+            )
diff --git a/src/sherpa_ai/output_parsers/validation_result.py b/src/sherpa_ai/output_parsers/validation_result.py
@@ -0,0 +1,7 @@
+from pydantic import BaseModel
+
+
+class ValidationResult(BaseModel):
+    is_valid: bool
+    result: str
+    feedback: str = ""
diff --git a/src/sherpa_ai/utils.py b/src/sherpa_ai/utils.py
@@ -6,8 +6,7 @@
 import tiktoken
 from bs4 import BeautifulSoup
 from langchain.docstore.document import Document
-from langchain.document_loaders import (UnstructuredMarkdownLoader,
-                                        UnstructuredPDFLoader)
+from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredPDFLoader
 from langchain.llms import OpenAI
 from langchain.text_splitter import TokenTextSplitter
 from loguru import logger
@@ -323,6 +322,6 @@ def check_if_number_exist(result: str, source: str):
     if len(error_numbers) > 0:
         for numbers in error_numbers:
             message += numbers + ", "
-        message = f"Disregard the numbers {message} as they are not relevant to the context provided and should not be taken into consideration."
-        return {"number_exists": False, "messages": message}
-    return {"number_exists": True, "messages": message}
+        message = f"Don't use the numbers {message} to answer the question instead stick to the numbers mentioned in the context."
+        return {"number_exists": False , "messages":message}
+    return {"number_exists": True , "messages":message}
diff --git a/src/tests/data/test_number_citation_validator_test_number_citation_succeeds_in_qa_0.jsonl b/src/tests/data/test_number_citation_validator_test_number_citation_succeeds_in_qa_0.jsonl
@@ -0,0 +1,4 @@
+{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nGiven your specialized expertise, historical context, and your mission to facilitate Machine-Learning-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task.\\n\\n**Task Description**: What is the annual salary for an entry level software engineer in Canada?\\n\\n**Possible Actions**:\\n{\\n    \"name\": \"Google Search\",\\n    \"args\": {\\n        \"query\": \"string\"\\n    }\\n}\\n\\n**Task Context**:\\nWhat is the annual salary for an entry level software engineer in Canada?\\n\\n\\n**History of Previous Actions**:\\n\\n\\nYou should only respond in JSON format as described below without any extra text.\\nResponse Format:\\n{\\n    \"command\": {\\n        \"name\": \"tool/command name you choose\",\\n        \"args\": {\\n            \"arg name\": \"value\"\\n        }\\n    }\\n}\\nEnsure the response can be parsed by Python json.loads\\n\\nIf you believe the task is complete and no further actions are necessary, respond with \"Finished\".\\n\\nFollow the described fromat strictly.\\n\\n", "agent": "human"}], "output": "{\\n    \"command\": {\\n        \"name\": \"Google Search\",\\n        \"args\": {\\n            \"query\": \"annual salary for entry level software engineer in Canada\"\\n        }\\n    }\\n}", "llm_name": "gpt-3.5-turbo"}
+{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: What is the annual salary for an entry level software engineer in Canada?\\n\\n\\nAction - Result History:\\nGoogle Search{'query': 'annual salary for entry level software engineer in Canada'}\\nA software engineer is a person who applies the engineering design process to design, develop, test, maintain, and evaluate computer software.\\n        The term programmer is sometimes used as a synonym, but may emphasize software implementation over design and can also lack connotations of engineering education or skills.\\n        the average annual  in Canada is around $9000 to $1,000,170,000 CAD for software engineers\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: What is the annual salary for an entry level software engineer in Canada?\\nResult:\\n", "agent": "human"}], "output": "The annual salary for an entry-level software engineer in Canada can vary depending on factors such as location, company size, and industry. However, according to the information provided in the action-result history, the average annual salary for software engineers in Canada is around $9,000 to $1,000,170,000 CAD.\\n\\nPlease note that the salary range mentioned in the history seems to be quite broad and may not accurately represent the typical salary for an entry-level software engineer. It is recommended to conduct further research or refer to reliable sources such as job search websites, salary surveys, or industry reports to get a more precise and up-to-date understanding of the salary range for entry-level software engineers in Canada.\\n\\nSources:\\n- [Payscale - Software Engineer Salary in Canada](https://www.payscale.com/research/CA/Job=Software_Engineer/Salary)\\n- [Glassdoor - Software Engineer Salaries in Canada](https://www.glassdoor.ca/Salaries/canada-software-engineer-salary-SRCH_IL.0,6_IN3_KO7,24.htm)", "llm_name": "gpt-3.5-turbo"}
+{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: What is the annual salary for an entry level software engineer in Canada?\\n\\n\\nAction - Result History:\\nA software engineer is a person who applies the engineering design process to design, develop, test, maintain, and evaluate computer software.\\n        The term programmer is sometimes used as a synonym, but may emphasize software implementation over design and can also lack connotations of engineering education or skills.\\n        the average annual  in Canada is around $9000 to $1,000,170,000 CAD for software engineers\\nGoogle Search{'query': 'annual salary for entry level software engineer in Canada'}Don't use the numbers 3, 06, 724,  to answer the question instead stick to the numbers mentioned in the context.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: What is the annual salary for an entry level software engineer in Canada?\\nResult:\\n", "agent": "human"}], "output": "The annual salary for an entry level software engineer in Canada is around $90,000 to $100,000 CAD. This information is based on the context provided and the average salary range mentioned. Please note that actual salaries may vary depending on factors such as location, company, and individual qualifications.\\n\\nSources:\\n- Context: No specific source mentioned\\n- Action-Result History: No specific source mentioned", "llm_name": "gpt-3.5-turbo"}
+{"input": [{"text": "You are a **question answering assistant** who solves user questions and offers a detailed solution.\\n\\nYour name is QA Agent.\\n\\nContext: What is the annual salary for an entry level software engineer in Canada?\\n\\n\\nAction - Result History:\\nA software engineer is a person who applies the engineering design process to design, develop, test, maintain, and evaluate computer software.\\n        The term programmer is sometimes used as a synonym, but may emphasize software implementation over design and can also lack connotations of engineering education or skills.\\n        the average annual  in Canada is around $9000 to $1,000,170,000 CAD for software engineers\\nGoogle Search{'query': 'annual salary for entry level software engineer in Canada'}Don't use the numbers 3, 06, 724,  to answer the question instead stick to the numbers mentioned in the context.\\nDon't use the numbers 100000, 90000,  to answer the question instead stick to the numbers mentioned in the context.\\n\\nGiven the context and the action-result history, please complete the task mentioned. Include any links you used from the context and history in the result.\\nTask: What is the annual salary for an entry level software engineer in Canada?\\nResult:\\n", "agent": "human"}], "output": "The annual salary for an entry level software engineer in Canada can vary depending on factors such as location, company size, and industry. However, based on the information provided in the context, the average annual salary for software engineers in Canada is around $9,000 to $1,000,170,000 CAD. It is important to note that these numbers are approximate and may not reflect the exact salary range for entry level positions.\\n\\nPlease keep in mind that the numbers mentioned in the context are quite broad and cover a wide range of salaries. It is recommended to research specific companies and industries to get a more accurate understanding of the salary range for entry level software engineers in Canada.\\n\\nSources:\\n- No specific sources were mentioned in the context or action-result history.", "llm_name": "gpt-3.5-turbo"}