From 083a058b2774a106d325a0151f657b47a5251190 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 1 Oct 2024 16:45:04 -0700
Subject: [PATCH 1/4] update evals

---
 optillm.py                       |  4 +-
 optillm/rto.py                   |  2 +-
 optillm/z3_solver.py             | 72 +++++++++++++++++++-------------
 scripts/eval_frames_benchmark.py |  4 +-
 scripts/gen_optillm_dataset.py   |  7 ++--
 5 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/optillm.py b/optillm.py
index de767ac..810c039 100644
--- a/optillm.py
+++ b/optillm.py
@@ -22,7 +22,7 @@
 from optillm.rto import round_trip_optimization
 from optillm.self_consistency import advanced_self_consistency_approach
 from optillm.pvg import inference_time_pv_game
-from optillm.z3_solver import Z3SolverSystem
+from optillm.z3_solver import Z3SymPySolverSystem
 from optillm.rstar import RStar
 from optillm.cot_reflection import cot_reflection
 from optillm.plansearch import plansearch
@@ -147,7 +147,7 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
         elif approach == 'rto':
             return round_trip_optimization(system_prompt, initial_query, client, model)
         elif approach == 'z3':
-            z3_solver = Z3SolverSystem(system_prompt, client, model)
+            z3_solver = Z3SymPySolverSystem(system_prompt, client, model)
             return z3_solver.process_query(initial_query)
         elif approach == "self_consistency":
             return advanced_self_consistency_approach(system_prompt, initial_query, client, model)
diff --git a/optillm/rto.py b/optillm/rto.py
index 4c62185..61e70e5 100644
--- a/optillm/rto.py
+++ b/optillm/rto.py
@@ -59,7 +59,7 @@ def round_trip_optimization(system_prompt: str, initial_query: str, client, mode
     c2 = extract_code_from_prompt(c2)
 
     if c1.strip() == c2.strip():
-        return c1
+        return c1, rto_completion_tokens
 
     messages = [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": f"Initial query: {initial_query}\n\nFirst generated code (C1):\n{c1}\n\nSecond generated code (C2):\n{c2}\n\nBased on the initial query and these two different code implementations, generate a final, optimized version of the code. Only respond with the final code, do not return anything else."}]
diff --git a/optillm/z3_solver.py b/optillm/z3_solver.py
index b24e38f..976ec92 100644
--- a/optillm/z3_solver.py
+++ b/optillm/z3_solver.py
@@ -1,5 +1,6 @@
 from typing import Dict, Any
 from z3 import *
+import sympy
 import io
 import re
 import contextlib
@@ -52,6 +53,7 @@ def prepare_safe_globals():
 
 def execute_code_in_process(code: str):
     import z3
+    import sympy
     import math
     import itertools
     from fractions import Fraction
@@ -62,9 +64,14 @@ def execute_code_in_process(code: str):
     z3_whitelist = set(dir(z3))
     safe_globals.update({name: getattr(z3, name) for name in z3_whitelist})
 
-    # Ensure key Z3 components are available
+    # Add SymPy specific functions
+    sympy_whitelist = set(dir(sympy))
+    safe_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})
+
+    # Ensure key Z3 and SymPy components are available
     safe_globals.update({
         'z3': z3,
+        'sympy': sympy,
         'Solver': z3.Solver,
         'solver': z3.Solver,
         'Optimize': z3.Optimize,
@@ -83,6 +90,15 @@ def execute_code_in_process(code: str):
         'ForAll': z3.ForAll,
         'Exists': z3.Exists,
         'model': z3.Model,
+        'Symbol': sympy.Symbol,
+        'solve': sympy.solve,
+        'simplify': sympy.simplify,
+        'expand': sympy.expand,
+        'factor': sympy.factor,
+        'diff': sympy.diff,
+        'integrate': sympy.integrate,
+        'limit': sympy.limit,
+        'series': sympy.series,
     })
     
     # Add custom functions
@@ -114,41 +130,38 @@ def Rational(numerator, denominator=1):
             return ("error", traceback.format_exc())
     return ("success", output_buffer.getvalue())
 
-class Z3SolverSystem:
+class Z3SymPySolverSystem:
     def __init__(self, system_prompt: str, client, model: str, timeout: int = 30):
         self.system_prompt = system_prompt
         self.model = model
         self.client = client
         self.timeout = timeout
-        self.z3_completion_tokens = 0
+        self.solver_completion_tokens = 0
         logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
     def process_query(self, query: str) -> str:
         try:
             analysis = self.analyze_query(query)
-            # print("Analysis: "+ analysis)
             if "SOLVER_CAN_BE_APPLIED: True" not in analysis:
-                return self.standard_llm_inference(query) , self.z3_completion_tokens
+                return self.standard_llm_inference(query), self.solver_completion_tokens
             
             formulation = self.extract_and_validate_expressions(analysis)
-            # print("Formulation: "+ formulation)
-            solver_result = self.solve_with_z3(formulation)
-            # print(solver_result)
+            solver_result = self.solve_with_z3_sympy(formulation)
              
-            return self.generate_response(query, analysis, solver_result), self.z3_completion_tokens
+            return self.generate_response(query, analysis, solver_result), self.solver_completion_tokens
         except Exception as e:
-            logging.error(f"An error occurred while processing the query with Z3, returning standard llm inference results: {str(e)}")
-            return self.standard_llm_inference(query), self.z3_completion_tokens
+            logging.error(f"An error occurred while processing the query with Z3 and SymPy, returning standard llm inference results: {str(e)}")
+            return self.standard_llm_inference(query), self.solver_completion_tokens
 
     def analyze_query(self, query: str) -> str:
-        analysis_prompt = f"""Analyze the given query and determine if it can be solved using Z3:
+        analysis_prompt = f"""Analyze the given query and determine if it can be solved using Z3 or SymPy:
 
 1. Identify variables, constraints, and objectives.
-2. Determine the problem type (e.g., SAT, optimization).
-3. Decide if Z3 is suitable.
+2. Determine the problem type (e.g., SAT, optimization, symbolic manipulation).
+3. Decide if Z3, SymPy, or a combination of both is suitable.
 
-If Z3 can be applied, provide Python code using Z3 to solve the problem. Make sure you define any additional methods you need for solving the problem.
-The code will be executed in an environment with only Z3 available, so do not include any other libraries or modules.
+If Z3 or SymPy can be applied, provide Python code using the appropriate library (or both) to solve the problem. Make sure you define any additional methods you need for solving the problem.
+The code will be executed in an environment with Z3 and SymPy available, so do not include any other libraries or modules.
 
 Query: {query}
 
@@ -157,7 +170,7 @@ def analyze_query(self, query: str) -> str:
 
 SOLVER_FORMULATION:
 ```python
-# Z3 code here
+# Z3 and/or SymPy code here
 ```
 
 Analysis:
@@ -174,7 +187,7 @@ def analyze_query(self, query: str) -> str:
             n=1,
             temperature=0.1
         )
-        self.z3_completion_tokens  = analysis_response.usage.completion_tokens
+        self.solver_completion_tokens = analysis_response.usage.completion_tokens
         return analysis_response.choices[0].message.content
 
     def generate_response(self, query: str, analysis: str, solver_result: Dict[str, Any]) -> str:
@@ -202,7 +215,7 @@ def generate_response(self, query: str, analysis: str, solver_result: Dict[str,
             n=1,
             temperature=0.1
         )
-        self.z3_completion_tokens  = response.usage.completion_tokens
+        self.solver_completion_tokens = response.usage.completion_tokens
         return response.choices[0].message.content
 
     def standard_llm_inference(self, query: str) -> str:
@@ -216,27 +229,27 @@ def standard_llm_inference(self, query: str) -> str:
             n=1,
             temperature=0.1
         )
-        self.z3_completion_tokens  = response.usage.completion_tokens
+        self.solver_completion_tokens = response.usage.completion_tokens
         return response.choices[0].message.content
 
     def extract_and_validate_expressions(self, analysis: str) -> str:
         formulation = re.search(r"```python\n([\s\S]+?)```", analysis)
         if formulation:
             return formulation.group(1).strip()
-        raise ValueError("No valid Z3 formulation found in the analysis.")
+        raise ValueError("No valid Z3 or SymPy formulation found in the analysis.")
 
-    def solve_with_z3(self, formulation: str, max_attempts: int = 3) -> Dict[str, Any]:
+    def solve_with_z3_sympy(self, formulation: str, max_attempts: int = 3) -> Dict[str, Any]:
         for attempt in range(max_attempts):
             output = self.execute_solver_code(formulation)
             if "Error:" not in output:
                 return {"status": "success", "output": output}
         
-            error_prompt = f"""Fix the Z3 code that resulted in an error. Follow these steps:
+            error_prompt = f"""Fix the Z3 or SymPy code that resulted in an error. Follow these steps:
 
     1. Review the original code and the error message carefully.
     2. Analyze the error and identify its root cause.
     3. Think through the necessary changes to fix the error.
-    4. Generate a corrected version of the Z3 code.
+    4. Generate a corrected version of the code.
 
     Original Code:
     {formulation}
@@ -247,9 +260,10 @@ def solve_with_z3(self, formulation: str, max_attempts: int = 3) -> Dict[str, An
     Step-by-Step Analysis:
     [Provide your step-by-step analysis here]
 
-    Corrected Z3 Code:
+    Corrected Z3 or SymPy Code:
     ```python
-    # Corrected Z3 code here
+    # Corrected code here
+    ```
     """
             response = self.client.chat.completions.create(
                 model=self.model,
@@ -261,13 +275,13 @@ def solve_with_z3(self, formulation: str, max_attempts: int = 3) -> Dict[str, An
                 n=1,
                 temperature=0.1
             )
-            self.z3_completion_tokens  = response.usage.completion_tokens
+            self.solver_completion_tokens = response.usage.completion_tokens
             formulation = self.extract_and_validate_expressions(response.choices[0].message.content)
 
         return {"status": "failed", "output": "Failed to solve after multiple attempts."}
 
     def execute_solver_code(self, code: str) -> str:
-        logging.info("Executing Z3 solver code")
+        logging.info("Executing Z3 and SymPy solver code")
         logging.info(f"Code: {code}")
         
         # Parse the code into an AST
@@ -292,5 +306,5 @@ def execute_solver_code(self, code: str) -> str:
             logging.error(f"Execution error: {result}")
             return f"Error: {result}"
 
-        logging.info("Z3 solver code executed successfully")
+        logging.info("Z3 and SymPy solver code executed successfully")
         return result
\ No newline at end of file
diff --git a/scripts/eval_frames_benchmark.py b/scripts/eval_frames_benchmark.py
index 16c44c0..4a5e684 100644
--- a/scripts/eval_frames_benchmark.py
+++ b/scripts/eval_frames_benchmark.py
@@ -137,9 +137,9 @@ def main(model: str):
     print(f"Accuracy: {accuracy:.2%}")
     
     # Print accuracy by reasoning type
-    reasoning_types = set(r['reasoning_types'] for r in results)
+    reasoning_types = set(r['reasoning_type'] for r in results)
     for rt in reasoning_types:
-        rt_samples = [r for r in results if r['reasoning_types'] == rt]
+        rt_samples = [r for r in results if r['reasoning_type'] == rt]
         rt_correct = sum(1 for r in rt_samples if r['evaluation_decision'] == 'TRUE')
         rt_accuracy = rt_correct / len(rt_samples)
         print(f"Accuracy for {rt}: {rt_accuracy:.2%}")
diff --git a/scripts/gen_optillm_dataset.py b/scripts/gen_optillm_dataset.py
index 9a3b3cd..976ae1b 100644
--- a/scripts/gen_optillm_dataset.py
+++ b/scripts/gen_optillm_dataset.py
@@ -26,7 +26,7 @@ async def generate_response(prompt: str, approach: str) -> Dict[str, Any]:
         }
     else:
         # Use OptILM with the specified approach
-        client = AsyncOpenAI(api_key="none", base_url="http://localhost:8000/v1")
+        client = AsyncOpenAI(api_key="none", base_url="http://localhost:8080/v1")
         response = await client.chat.completions.create(
             model=f"{approach}-gpt-4o-mini",  # Assuming OptILM uses this naming convention
             messages=[{"role": "user", "content": prompt}],
@@ -48,7 +48,7 @@ async def rank_responses(prompt: str, responses: List[Dict[str, Any]]) -> List[i
     )
     
     ranking_str = ranking_response.choices[0].message.content.strip()
-    print(ranking_str)
+    print(f"Ranking str: {ranking_str}")
     return [int(idx) for idx in ranking_str.split(",")]
 
 async def process_sample(sample: Dict[str, Any]) -> Dict[str, Any]:
@@ -66,6 +66,7 @@ async def process_sample(sample: Dict[str, Any]) -> Dict[str, Any]:
     rankings = await rank_responses(prompt, results)
 
     # Add rankings to results
+    print(rankings)
     for rank, idx in enumerate(rankings):
         results[idx]["rank"] = rank
 
@@ -79,7 +80,7 @@ async def generate_dataset(num_samples: int, output_file: str):
     dataset = load_dataset("lmsys/arena-hard-auto-v0.1", split="train")
     
     with open(output_file, "w") as f:
-        for sample in tqdm(dataset.select(range(num_samples)), total=num_samples):
+        for sample in tqdm(dataset.select(range(29, 29 + num_samples)), total=num_samples):
             result = await process_sample(sample)
             f.write(json.dumps(result) + "\n")
 

From bb7ae69fed67b861a1d48bb412d3480612e81b5b Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 1 Oct 2024 22:53:18 -0700
Subject: [PATCH 2/4] updates

handle safety settings for gemini models
fix handing of error response in memory plugin
---
 litellm_wrapper.py               | 12 +++++++++++-
 optillm/plugins/memory_plugin.py | 28 +++++++++++++++++-----------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/litellm_wrapper.py b/litellm_wrapper.py
index 89d7b24..0c75324 100644
--- a/litellm_wrapper.py
+++ b/litellm_wrapper.py
@@ -3,6 +3,16 @@
 from litellm import completion
 from typing import List, Dict, Any, Optional
 
+SAFETY_SETTINGS = [
+    {"category": cat, "threshold": "BLOCK_NONE"}
+    for cat in [
+        "HARM_CATEGORY_HARASSMENT",
+        "HARM_CATEGORY_HATE_SPEECH",
+        "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "HARM_CATEGORY_DANGEROUS_CONTENT"
+    ]
+]
+
 class LiteLLMWrapper:
     def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
         self.api_key = api_key
@@ -14,7 +24,7 @@ class Chat:
         class Completions:
             @staticmethod
             def create(model: str, messages: List[Dict[str, str]], **kwargs):
-                response = completion(model=model, messages=messages, **kwargs)
+                response = completion(model=model, messages=messages, **kwargs, safety_settings=SAFETY_SETTINGS)
                 # Convert LiteLLM response to match OpenAI response structure
                 return response
 
diff --git a/optillm/plugins/memory_plugin.py b/optillm/plugins/memory_plugin.py
index 00d2bad..835a084 100644
--- a/optillm/plugins/memory_plugin.py
+++ b/optillm/plugins/memory_plugin.py
@@ -50,19 +50,23 @@ def extract_query(text: str) -> Tuple[str, str]:
     return query, context
 
 def extract_key_information(text: str, client, model: str) -> List[str]:
+    # print(f"Prompt : {text}")
     prompt = f"""Extract key information from the following text. Provide a list of important facts or concepts, each on a new line:
 
 {text}
 
 Key information:"""
 
-    response = client.chat.completions.create(
-        model=model,
-        messages=[{"role": "user", "content": prompt}],
-        max_tokens=1000
-    )
-
-    key_info = response.choices[0].message.content.strip().split('\n')
+    try: 
+        response = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=1000
+        )
+        key_info = response.choices[0].message.content.strip().split('\n')
+    except Exception as e:
+        print(f"Error parsing content: {str(e)}")
+        return [],0
     
     return [info.strip('- ') for info in key_info if info.strip()], response.usage.completion_tokens
 
@@ -75,14 +79,16 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str
     chunk_size = 10000
     for i in range(0, len(context), chunk_size):
         chunk = context[i:i+chunk_size]
+        # print(f"chunk: {chunk}")
         key_info, tokens = extract_key_information(chunk, client, model)
+        #print(f"key info: {key_info}")
         completion_tokens += tokens
         for info in key_info:
             memory.add(info)
-
+    # print(f"query : {query}")
     # Retrieve relevant information from memory
     relevant_info = memory.get_relevant(query)
-    
+    # print(f"relevant_info : {relevant_info}")
     # Generate response using relevant information
     prompt = f"""System: {system_prompt}
 
@@ -96,8 +102,8 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str
         messages=[{"role": "user", "content": prompt}],
         max_tokens=1000
     )
-
+    print(f"response : {response}")
     final_response = response.choices[0].message.content.strip()
     completion_tokens += response.usage.completion_tokens
-
+    print(f"final_response: {final_response}")
     return final_response, completion_tokens
\ No newline at end of file

From 4169321c719485e61b9585eb235fe9c467429ece Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Wed, 2 Oct 2024 02:52:45 -0700
Subject: [PATCH 3/4] add privacy plugin

---
 litellm_wrapper.py                |   4 +-
 optillm.py                        |  76 ++++++++++-------
 optillm/plugins/privacy_plugin.py | 135 ++++++++++++++++++++++++++++++
 requirements.txt                  |   4 +-
 setup.py                          |   2 +
 5 files changed, 188 insertions(+), 33 deletions(-)
 create mode 100644 optillm/plugins/privacy_plugin.py

diff --git a/litellm_wrapper.py b/litellm_wrapper.py
index 0c75324..9d19b09 100644
--- a/litellm_wrapper.py
+++ b/litellm_wrapper.py
@@ -38,8 +38,8 @@ def list():
             # This list can be expanded as needed.
             return {
                 "data": [
-                    {"id": "gpt-3.5-turbo"},
-                    {"id": "gpt-4"},
+                    {"id": "gpt-4o-mini"},
+                    {"id": "gpt-4o"},
                     {"id": "command-nightly"},
                     # Add more models as needed
                 ]
diff --git a/optillm.py b/optillm.py
index 810c039..4db69f0 100644
--- a/optillm.py
+++ b/optillm.py
@@ -36,31 +36,34 @@
 # Initialize Flask app
 app = Flask(__name__)
 
-# OpenAI, Azure, or LiteLLM API configuration
-if os.environ.get("OPENAI_API_KEY"):
-    API_KEY = os.environ.get("OPENAI_API_KEY")
-    default_client = OpenAI(api_key=API_KEY)
-elif os.environ.get("AZURE_OPENAI_API_KEY"):
-    API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
-    API_VERSION = os.environ.get("AZURE_API_VERSION")
-    AZURE_ENDPOINT = os.environ.get("AZURE_API_BASE")
-    if API_KEY is not None:
-        default_client = AzureOpenAI(
-            api_key=API_KEY,
-            api_version=API_VERSION,
-            azure_endpoint=AZURE_ENDPOINT,
-        )
+def get_config():
+    API_KEY = None
+    # OpenAI, Azure, or LiteLLM API configuration
+    if os.environ.get("OPENAI_API_KEY"):
+        API_KEY = os.environ.get("OPENAI_API_KEY")
+        default_client = OpenAI(api_key=API_KEY)
+    elif os.environ.get("AZURE_OPENAI_API_KEY"):
+        API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
+        API_VERSION = os.environ.get("AZURE_API_VERSION")
+        AZURE_ENDPOINT = os.environ.get("AZURE_API_BASE")
+        if API_KEY is not None:
+            default_client = AzureOpenAI(
+                api_key=API_KEY,
+                api_version=API_VERSION,
+                azure_endpoint=AZURE_ENDPOINT,
+            )
+        else:
+            from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+            azure_credential = DefaultAzureCredential()
+            token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
+            default_client = AzureOpenAI(
+                api_version=API_VERSION,
+                azure_endpoint=AZURE_ENDPOINT,
+                azure_ad_token_provider=token_provider
+            )
     else:
-        from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-        azure_credential = DefaultAzureCredential()
-        token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
-        default_client = AzureOpenAI(
-            api_version=API_VERSION,
-            azure_endpoint=AZURE_ENDPOINT,
-            azure_ad_token_provider=token_provider
-        )
-else:
-    default_client = LiteLLMWrapper()
+        default_client = LiteLLMWrapper()
+    return default_client, API_KEY
 
 # Server configuration
 server_config = {
@@ -254,6 +257,14 @@ def check_api_key():
 def proxy():
     logger.info('Received request to /v1/chat/completions')
     data = request.get_json()
+    auth_header = request.headers.get("Authorization")
+    bearer_token = ""
+
+    if auth_header and auth_header.startswith("Bearer "):
+        # Extract the bearer token
+        bearer_token = auth_header.split("Bearer ")[1].strip()
+        logger.debug(f"Intercepted Bearer Token: {bearer_token}")
+    
     logger.debug(f'Request data: {data}')
 
     stream = data.get('stream', False)
@@ -272,15 +283,20 @@ def proxy():
         model = f"{optillm_approach}-{model}"
 
     base_url = server_config['base_url']
-
-    if base_url != "":
-        client = OpenAI(api_key=API_KEY, base_url=base_url)
-    else:
-        client = default_client
+    default_client, api_key = get_config()
 
     operation, approaches, model = parse_combined_approach(model, known_approaches, plugin_approaches)
     logger.info(f'Using approach(es) {approaches}, operation {operation}, with model {model}')
 
+    if bearer_token != "" and bearer_token.startswith("sk-") and model.startswith("gpt"):
+        api_key = bearer_token
+        if base_url != "":
+            client = OpenAI(api_key=api_key, base_url=base_url)
+        else:
+            client = OpenAI(api_key=api_key)
+    else: 
+        client = default_client
+
     try:
         if operation == 'SINGLE':
             final_response, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
@@ -333,7 +349,7 @@ def proxy():
 @app.route('/v1/models', methods=['GET'])
 def proxy_models():
     logger.info('Received request to /v1/models')
-    
+    default_client, API_KEY = get_config()
     try:
         if server_config['base_url']:
             client = OpenAI(api_key=API_KEY, base_url=server_config['base_url'])
diff --git a/optillm/plugins/privacy_plugin.py b/optillm/plugins/privacy_plugin.py
new file mode 100644
index 0000000..bd12d74
--- /dev/null
+++ b/optillm/plugins/privacy_plugin.py
@@ -0,0 +1,135 @@
+import spacy
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine, OperatorConfig
+from presidio_anonymizer.operators import Operator, OperatorType
+
+from typing import Dict, Tuple
+
+SLUG = "privacy"
+
+class InstanceCounterAnonymizer(Operator):
+    """
+    Anonymizer which replaces the entity value
+    with an instance counter per entity.
+    """
+
+    REPLACING_FORMAT = "<{entity_type}_{index}>"
+
+    def operate(self, text: str, params: Dict = None) -> str:
+        """Anonymize the input text."""
+
+        entity_type: str = params["entity_type"]
+
+        # entity_mapping is a dict of dicts containing mappings per entity type
+        entity_mapping: Dict[Dict:str] = params["entity_mapping"]
+
+        entity_mapping_for_type = entity_mapping.get(entity_type)
+        if not entity_mapping_for_type:
+            new_text = self.REPLACING_FORMAT.format(
+                entity_type=entity_type, index=0
+            )
+            entity_mapping[entity_type] = {}
+
+        else:
+            if text in entity_mapping_for_type:
+                return entity_mapping_for_type[text]
+
+            previous_index = self._get_last_index(entity_mapping_for_type)
+            new_text = self.REPLACING_FORMAT.format(
+                entity_type=entity_type, index=previous_index + 1
+            )
+
+        entity_mapping[entity_type][text] = new_text
+        return new_text
+
+    @staticmethod
+    def _get_last_index(entity_mapping_for_type: Dict) -> int:
+        """Get the last index for a given entity type."""
+
+        def get_index(value: str) -> int:
+            return int(value.split("_")[-1][:-1])
+
+        indices = [get_index(v) for v in entity_mapping_for_type.values()]
+        return max(indices)
+
+    def validate(self, params: Dict = None) -> None:
+        """Validate operator parameters."""
+
+        if "entity_mapping" not in params:
+            raise ValueError("An input Dict called `entity_mapping` is required.")
+        if "entity_type" not in params:
+            raise ValueError("An entity_type param is required.")
+
+    def operator_name(self) -> str:
+        return "entity_counter"
+
+    def operator_type(self) -> OperatorType:
+        return OperatorType.Anonymize
+
+def download_model(model_name):
+    if not spacy.util.is_package(model_name):
+        print(f"Downloading {model_name} model...")
+        spacy.cli.download(model_name)
+    else:
+        print(f"{model_name} model already downloaded.")
+
+def replace_entities(entity_map, text):
+    # Create a reverse mapping of placeholders to entity names
+    reverse_map = {}
+    for entity_type, entities in entity_map.items():
+        for entity_name, placeholder in entities.items():
+            reverse_map[placeholder] = entity_name
+    
+    # Function to replace placeholders with entity names
+    def replace_placeholder(match):
+        placeholder = match.group(0)
+        return reverse_map.get(placeholder, placeholder)
+    
+    # Use regex to find and replace all placeholders
+    import re
+    pattern = r'<[A-Z_]+_\d+>'
+    replaced_text = re.sub(pattern, replace_placeholder, text)
+    
+    return replaced_text
+
+def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]:
+    # Use the function
+    model_name = "en_core_web_lg"
+    download_model(model_name)
+
+    analyzer = AnalyzerEngine() 
+    analyzer_results = analyzer.analyze(text=initial_query, language="en")
+
+    # Create Anonymizer engine and add the custom anonymizer
+    anonymizer_engine = AnonymizerEngine()
+    anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer)
+
+    # Create a mapping between entity types and counters
+    entity_mapping = dict()
+
+    # Anonymize the text
+    anonymized_result = anonymizer_engine.anonymize(
+        initial_query,
+        analyzer_results,
+        {
+            "DEFAULT": OperatorConfig(
+                "entity_counter", {"entity_mapping": entity_mapping}
+            )
+        },
+    )
+    # print(f"Anonymized request: {anonymized_result.text}")
+    
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": anonymized_result.text}],
+    )
+
+    # print(entity_mapping)
+    final_response = response.choices[0].message.content.strip()
+    # print(f"response: {final_response}")
+
+    final_response = replace_entities(entity_mapping, final_response)
+    
+    return final_response, response.usage.completion_tokens
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 47c69eb..12b64c8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,6 @@ scikit-learn
 litellm
 requests
 beautifulsoup4
-lxml
\ No newline at end of file
+lxml
+presidio_analyzer
+presidio_anonymizer
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 8097c20..145f4f1 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,8 @@
         "requests",
         "beautifulsoup4",
         "lxml",
+        "presidio_analyzer",
+        "presidio_anonymizer"
     ],
     author="codelion",
     author_email="codelion@okyasoft.com",

From ef827457d27f785161c1ace6b20c5474aaf20502 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Wed, 2 Oct 2024 02:53:00 -0700
Subject: [PATCH 4/4] Create train_optillm_classifier.py

---
 scripts/train_optillm_classifier.py | 113 ++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 scripts/train_optillm_classifier.py

diff --git a/scripts/train_optillm_classifier.py b/scripts/train_optillm_classifier.py
new file mode 100644
index 0000000..95d10ca
--- /dev/null
+++ b/scripts/train_optillm_classifier.py
@@ -0,0 +1,113 @@
+import json
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import LabelEncoder
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report
+import joblib
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def load_data(file_path):
+    data = []
+    with open(file_path, 'r') as f:
+        for line in f:
+            data.append(json.loads(line))
+    return data
+
+def preprocess_data(data):
+    X = []
+    y = []
+    for item in data:
+        prompt = item['prompt']
+        results = item['results']
+        try:
+            best_approach = min(results, key=lambda x: x.get('rank', float('inf')))['approach']
+            X.append(prompt)
+            y.append(best_approach)
+        except (KeyError, ValueError) as e:
+            logger.warning(f"Error encountered: {e}. Skipping this item.")
+            logger.debug(f"Problematic item: {item}")
+    return X, y
+
+def extract_features(X):
+    vectorizer = TfidfVectorizer(max_features=1000)
+    X_features = vectorizer.fit_transform(X)
+    return X_features, vectorizer
+
+def train_model(X_train, y_train):
+    model = RandomForestClassifier(n_estimators=100, random_state=42)
+    model.fit(X_train, y_train)
+    return model
+
+def evaluate_model(model, X_test, y_test):
+    y_pred = model.predict(X_test)
+    print(classification_report(y_test, y_pred, zero_division=1))
+
+def select_approach(model, vectorizer, prompt, effort, approaches, token_usage):
+    X_input = vectorizer.transform([prompt])
+    probabilities = model.predict_proba(X_input)[0]
+    
+    sorted_approaches = sorted(zip(approaches, probabilities), key=lambda x: x[1], reverse=True)
+    
+    if effort == 1:
+        return sorted_approaches[0][0]
+    elif effort == 0:
+        return min(token_usage, key=token_usage.get)
+    else:
+        scores = []
+        for approach, prob in sorted_approaches:
+            if approach in token_usage:
+                normalized_tokens = (token_usage[approach] - min(token_usage.values())) / (max(token_usage.values()) - min(token_usage.values()))
+                score = effort * prob + (1 - effort) * (1 - normalized_tokens)
+                scores.append((approach, score))
+        return max(scores, key=lambda x: x[1])[0] if scores else sorted_approaches[0][0]
+
+def main():
+    data = load_data('optillm_dataset_1.jsonl')
+    X, y = preprocess_data(data)
+    
+    if not X or not y:
+        logger.error("No valid data after preprocessing. Check your dataset.")
+        return
+
+    X_features, vectorizer = extract_features(X)
+    
+    label_encoder = LabelEncoder()
+    y_encoded = label_encoder.fit_transform(y)
+    
+    X_train, X_test, y_train, y_test = train_test_split(X_features, y_encoded, test_size=0.2, random_state=42)
+    
+    model = train_model(X_train, y_train)
+    
+    evaluate_model(model, X_test, y_test)
+    
+    joblib.dump(model, 'optillm_approach_classifier.joblib')
+    joblib.dump(vectorizer, 'optillm_vectorizer.joblib')
+    joblib.dump(label_encoder, 'optillm_label_encoder.joblib')
+    
+    token_usage = {approach: [] for approach in set(y)}
+    for item in data:
+        for result in item['results']:
+            approach = result.get('approach')
+            tokens = result.get('tokens')
+            if approach and tokens is not None:
+                if approach not in token_usage:
+                    token_usage[approach] = []
+                token_usage[approach].append(tokens)
+    
+    avg_token_usage = {approach: np.mean(usage) if usage else 0 for approach, usage in token_usage.items()}
+    
+    prompt = "Write a Python function to calculate the Fibonacci sequence."
+    effort = 0.1
+    approaches = label_encoder.classes_
+    
+    selected_approach = select_approach(model, vectorizer, prompt, effort, approaches, avg_token_usage)
+    print(f"Selected approach for prompt '{prompt}' with effort {effort}: {selected_approach}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file