Merge pull request #35 from rmusser01/main

Fix for Ollama (tested)
the-crypt-keeper · Oct 13, 2024 · 9503e7f · 9503e7f
2 parents d28ecd0 + 82aef0e
commit 9503e7f
Show file tree

Hide file tree

Showing 7 changed files with 195 additions and 76 deletions.
diff --git a/App_Function_Libraries/Chat.py b/App_Function_Libraries/Chat.py
@@ -94,7 +94,7 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag
             response = chat_with_huggingface(api_key, input_data, prompt, temp)  # , system_message)
 
         elif api_endpoint.lower() == "ollama":
-            response = chat_with_ollama(input_data, prompt, temp, system_message)
+            response = chat_with_ollama(input_data, prompt, None, api_key, temp, system_message)
 
         elif api_endpoint.lower() == "aphrodite":
             response = chat_with_aphrodite(input_data, prompt, temp, system_message)

diff --git a/App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py b/App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py
@@ -194,7 +194,7 @@ def summarize_explain_text(message, api_endpoint, api_key, summarization, explan
                     summarization_response = summarize_with_huggingface(api_key, input_data, user_prompt,
                                                                         temp)  # , system_prompt)
                 elif api_endpoint.lower() == "ollama":
-                    summarization_response = summarize_with_ollama(input_data, user_prompt, temp, system_prompt)
+                    summarization_response = summarize_with_ollama(input_data, user_prompt, None, api_key, temp, system_prompt)
                 else:
                     raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
         except Exception as e:

diff --git a/App_Function_Libraries/LLM_API_Calls_Local.py b/App_Function_Libraries/LLM_API_Calls_Local.py
@@ -4,6 +4,7 @@
 # This library is used to perform summarization with a 'local' inference engine.
 #
 ####
+import logging
 from typing import Union
 
 ####################
@@ -149,7 +150,7 @@ def chat_with_llama(input_data, custom_prompt, temp, api_url="http://127.0.0.1:8
             #'min_p': '0.05',
             #'n_predict': '-1',
             #'n_keep': '0',
-            #'stream': 'false',
+            'stream': 'True',
             #'stop': '["\n"]',
             #'tfs_z': '1.0',
             #'repeat_penalty': '1.1',
@@ -432,10 +433,19 @@ def chat_with_aphrodite(input_data, custom_prompt_input, api_key=None, api_IP="h
         return "Error summarizing with Aphrodite."
 
 
-# FIXME
-def chat_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/api/chat", api_key=None, temp=None, system_message=None, model=None):
+def chat_with_ollama(
+    input_data,
+    custom_prompt,
+    api_url="http://127.0.0.1:11434/v1/chat/completions",
+    api_key=None,
+    temp=None,
+    system_message=None,
+    model=None,
+    max_retries=5,
+    retry_delay=20
+):
     try:
-        logging.debug("ollama: Loading and validating configurations")
+        logging.debug("Ollama: Loading and validating configurations")
         loaded_config_data = load_and_log_configs()
         if loaded_config_data is None:
             logging.error("Failed to load configuration data")
@@ -453,7 +463,19 @@ def chat_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/
                 else:
                     logging.warning("Ollama: No API key found in config file")
 
-        model = loaded_config_data['models']['ollama']
+            # Set model from parameter or config
+            if model is None:
+                model = loaded_config_data['models'].get('ollama')
+                if model is None:
+                    logging.error("Ollama: Model not found in config file")
+                    return "Ollama: Model not found in config file"
+
+            # Set api_url from parameter or config
+            if api_url is None:
+                api_url = loaded_config_data['local_api_ip'].get('ollama')
+                if api_url is None:
+                    logging.error("Ollama: API URL not found in config file")
+                    return "Ollama: API URL not found in config file"
 
         # Load transcript
         logging.debug("Ollama: Loading JSON data")
@@ -486,48 +508,88 @@ def chat_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/
             'accept': 'application/json',
             'content-type': 'application/json',
         }
-        if len(ollama_api_key) > 5:
+        if ollama_api_key and len(ollama_api_key) > 5:
             headers['Authorization'] = f'Bearer {ollama_api_key}'
 
-        ollama_prompt = f"{custom_prompt} \n\n\n\n{text}"
-        if system_message is None:
-            system_message = "You are a helpful AI assistant."
-        logging.debug(f"llama: Prompt being sent is {ollama_prompt}")
+        ollama_prompt = f"{custom_prompt}\n\n{text}"
         if system_message is None:
             system_message = "You are a helpful AI assistant."
+        logging.debug(f"Ollama: Prompt being sent is: {ollama_prompt}")
 
-        data = {
+        data_payload = {
             "model": model,
             "messages": [
-                {"role": "system",
-                 "content": system_message
-                 },
-                {"role": "user",
-                 "content": ollama_prompt
-                 }
+                {
+                    "role": "system",
+                    "content": system_message
+                },
+                {
+                    "role": "user",
+                    "content": ollama_prompt
+                }
             ],
         }
 
-        logging.debug("Ollama: Submitting request to API endpoint")
-        print("Ollama: Submitting request to API endpoint")
-        response = requests.post(api_url, headers=headers, json=data)
-        response_data = response.json()
-        logging.debug("API Response Data: %s", response_data)
+        for attempt in range(1, max_retries + 1):
+            logging.debug("Ollama: Submitting request to API endpoint")
+            print("Ollama: Submitting request to API endpoint")
+            try:
+                response = requests.post(api_url, headers=headers, json=data_payload, timeout=30)
+                response.raise_for_status()  # Raises HTTPError for bad responses
+                response_data = response.json()
+            except requests.exceptions.Timeout:
+                logging.error("Ollama: Request timed out.")
+                return "Ollama: Request timed out."
+            except requests.exceptions.HTTPError as http_err:
+                logging.error(f"Ollama: HTTP error occurred: {http_err}")
+                return f"Ollama: HTTP error occurred: {http_err}"
+            except requests.exceptions.RequestException as req_err:
+                logging.error(f"Ollama: Request exception: {req_err}")
+                return f"Ollama: Request exception: {req_err}"
+            except json.JSONDecodeError:
+                logging.error("Ollama: Failed to decode JSON response")
+                return "Ollama: Failed to decode JSON response."
+            except Exception as e:
+                logging.error(f"Ollama: An unexpected error occurred: {str(e)}")
+                return f"Ollama: An unexpected error occurred: {str(e)}"
+
+            logging.debug(f"API Response Data: {response_data}")
 
-        if response.status_code == 200:
-            # if 'X' in response_data:
-            logging.debug(response_data)
-            summary = response_data['message']['content'].strip()
-            logging.debug("Ollama: Chat request successful")
-            print("\n\nChat request successful.")
-            return summary
-        else:
-            logging.error(f"\n\nOllama: API request failed with status code {response.status_code}: {response.text}")
-            return f"Ollama: API request failed: {response.text}"
+            if response.status_code == 200:
+                # Inspect available keys
+                available_keys = list(response_data.keys())
+                logging.debug(f"Ollama: Available keys in response: {available_keys}")
+
+                # Attempt to retrieve 'response'
+                summary = None
+                if 'response' in response_data and response_data['response']:
+                    summary = response_data['response'].strip()
+                elif 'choices' in response_data and len(response_data['choices']) > 0:
+                    choice = response_data['choices'][0]
+                    if 'message' in choice and 'content' in choice['message']:
+                        summary = choice['message']['content'].strip()
+
+                if summary:
+                    logging.debug("Ollama: Chat request successful")
+                    print("\n\nChat request successful.")
+                    return summary
+                elif response_data.get('done_reason') == 'load':
+                    logging.warning(f"Ollama: Model is loading. Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
+                else:
+                    logging.error("Ollama: API response does not contain 'response' or 'choices'.")
+                    return "Ollama: API response does not contain 'response' or 'choices'."
+            else:
+                logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
+                return f"Ollama: API request failed: {response.text}"
+
+        logging.error("Ollama: Maximum retry attempts reached. Model is still loading.")
+        return "Ollama: Maximum retry attempts reached. Model is still loading."
 
     except Exception as e:
         logging.error("\n\nOllama: Error in processing: %s", str(e))
-        return f"Ollama: Error occurred while processing summary with ollama: {str(e)}"
+        return f"Ollama: Error occurred while processing summary with Ollama: {str(e)}"
+
 
 def chat_with_vllm(
         input_data: Union[str, dict, list],

diff --git a/App_Function_Libraries/RAG/RAG_Libary_2.py b/App_Function_Libraries/RAG/RAG_Libary_2.py
@@ -263,7 +263,7 @@ def generate_answer(api_choice: str, context: str, query: str) -> str:
 
     elif api_choice.lower() == "ollama":
         from App_Function_Libraries.Summarization.Local_Summarization_Lib import summarize_with_ollama
-        return summarize_with_ollama(prompt, "", config['Local-API']['ollama_api_key'], None, None, None)
+        return summarize_with_ollama(prompt, "", config['Local-API']['ollama_api_IP'], config['Local-API']['ollama_api_key'], None, None, None)
 
     elif api_choice.lower() == "custom_openai_api":
         logging.debug(f"RAG Answer Gen: Trying with Custom_OpenAI API")

diff --git a/App_Function_Libraries/Summarization/Local_Summarization_Lib.py b/App_Function_Libraries/Summarization/Local_Summarization_Lib.py
@@ -21,6 +21,7 @@
 import json
 import logging
 import os
+import time
 from typing import Union
 
 import requests
@@ -640,10 +641,19 @@ def summarize_with_vllm(
         return f"Error: Unexpected error during vLLM summarization - {str(e)}"
 
 
-# FIXME - update to be a summarize request
-def summarize_with_ollama(input_data, custom_prompt, api_key=None, temp=None, system_message=None, model=None, api_url="http://127.0.0.1:11434/api/generate",):
+def summarize_with_ollama(
+    input_data,
+    custom_prompt,
+    api_url="http://127.0.0.1:11434/v1/chat/completions",
+    api_key=None,
+    temp=None,
+    system_message=None,
+    model=None,
+    max_retries=5,
+    retry_delay=20
+):
     try:
-        logging.debug("ollama: Loading and validating configurations")
+        logging.debug("Ollama: Loading and validating configurations")
         loaded_config_data = load_and_log_configs()
         if loaded_config_data is None:
             logging.error("Failed to load configuration data")
@@ -661,7 +671,19 @@ def summarize_with_ollama(input_data, custom_prompt, api_key=None, temp=None, sy
                 else:
                     logging.warning("Ollama: No API key found in config file")
 
-        model = loaded_config_data['models']['ollama']
+            # Set model from parameter or config
+            if model is None:
+                model = loaded_config_data['models'].get('ollama')
+                if model is None:
+                    logging.error("Ollama: Model not found in config file")
+                    return "Ollama: Model not found in config file"
+
+            # Set api_url from parameter or config
+            if api_url is None:
+                api_url = loaded_config_data['local_api_ip'].get('ollama')
+                if api_url is None:
+                    logging.error("Ollama: API URL not found in config file")
+                    return "Ollama: API URL not found in config file"
 
         # Load transcript
         logging.debug("Ollama: Loading JSON data")
@@ -690,57 +712,92 @@ def summarize_with_ollama(input_data, custom_prompt, api_key=None, temp=None, sy
         else:
             raise ValueError("Ollama: Invalid input data format")
 
-        if custom_prompt is None:
-            custom_prompt = f"{summarizer_prompt}\n\n\n\n{text}"
-        else:
-            custom_prompt = f"{custom_prompt}\n\n\n\n{text}"
-
         headers = {
             'accept': 'application/json',
             'content-type': 'application/json',
         }
-        if len(ollama_api_key) > 5:
+        if ollama_api_key and len(ollama_api_key) > 5:
             headers['Authorization'] = f'Bearer {ollama_api_key}'
 
-        ollama_prompt = f"{custom_prompt} \n\n\n\n{text}"
-        if system_message is None:
-            system_message = "You are a helpful AI assistant."
-        logging.debug(f"llama: Prompt being sent is {ollama_prompt}")
+        ollama_prompt = f"{custom_prompt}\n\n{text}"
         if system_message is None:
             system_message = "You are a helpful AI assistant."
+        logging.debug(f"Ollama: Prompt being sent is: {ollama_prompt}")
 
-        data = {
+        data_payload = {
             "model": model,
             "messages": [
-                {"role": "system",
-                 "content": system_message
-                 },
-                {"role": "user",
-                 "content": ollama_prompt
-                 }
+                {
+                    "role": "system",
+                    "content": system_message
+                },
+                {
+                    "role": "user",
+                    "content": ollama_prompt
+                }
             ],
+            'temperature': temp
         }
 
-        logging.debug("Ollama: Submitting request to API endpoint")
-        print("Ollama: Submitting request to API endpoint")
-        response = requests.post(api_url, headers=headers, json=data)
-        response_data = response.json()
-        logging.debug("API Response Data: %s", response_data)
+        for attempt in range(1, max_retries + 1):
+            logging.debug("Ollama: Submitting request to API endpoint")
+            print("Ollama: Submitting request to API endpoint")
+            try:
+                response = requests.post(api_url, headers=headers, json=data_payload, timeout=30)
+                response.raise_for_status()  # Raises HTTPError for bad responses
+                response_data = response.json()
+            except requests.exceptions.Timeout:
+                logging.error("Ollama: Request timed out.")
+                return "Ollama: Request timed out."
+            except requests.exceptions.HTTPError as http_err:
+                logging.error(f"Ollama: HTTP error occurred: {http_err}")
+                return f"Ollama: HTTP error occurred: {http_err}"
+            except requests.exceptions.RequestException as req_err:
+                logging.error(f"Ollama: Request exception: {req_err}")
+                return f"Ollama: Request exception: {req_err}"
+            except json.JSONDecodeError:
+                logging.error("Ollama: Failed to decode JSON response")
+                return "Ollama: Failed to decode JSON response."
+            except Exception as e:
+                logging.error(f"Ollama: An unexpected error occurred: {str(e)}")
+                return f"Ollama: An unexpected error occurred: {str(e)}"
+
+            logging.debug(f"API Response Data: {response_data}")
 
-        if response.status_code == 200:
-            # if 'X' in response_data:
-            logging.debug(response_data)
-            summary = response_data['content'].strip()
-            logging.debug("Ollama: Summarization successful")
-            print("Summarization successful.")
-            return summary
-        else:
-            logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
-            return f"Ollama: API request failed: {response.text}"
+            if response.status_code == 200:
+                # Inspect available keys
+                available_keys = list(response_data.keys())
+                logging.debug(f"Ollama: Available keys in response: {available_keys}")
+
+                # Attempt to retrieve 'response'
+                summary = None
+                if 'response' in response_data and response_data['response']:
+                    summary = response_data['response'].strip()
+                elif 'choices' in response_data and len(response_data['choices']) > 0:
+                    choice = response_data['choices'][0]
+                    if 'message' in choice and 'content' in choice['message']:
+                        summary = choice['message']['content'].strip()
+
+                if summary:
+                    logging.debug("Ollama: Chat request successful")
+                    print("\n\nChat request successful.")
+                    return summary
+                elif response_data.get('done_reason') == 'load':
+                    logging.warning(f"Ollama: Model is loading. Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
+                else:
+                    logging.error("Ollama: API response does not contain 'response' or 'choices'.")
+                    return "Ollama: API response does not contain 'response' or 'choices'."
+            else:
+                logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
+                return f"Ollama: API request failed: {response.text}"
+
+        logging.error("Ollama: Maximum retry attempts reached. Model is still loading.")
+        return "Ollama: Maximum retry attempts reached. Model is still loading."
 
     except Exception as e:
-        logging.error("Ollama: Error in processing: %s", str(e))
-        return f"Ollama: Error occurred while processing summary with ollama: {str(e)}"
+        logging.error("\n\nOllama: Error in processing: %s", str(e))
+        return f"Ollama: Error occurred while processing summary with Ollama: {str(e)}"
 
 
 # FIXME - update to be a summarize request

diff --git a/App_Function_Libraries/Summarization/Summarization_General_Lib.py b/App_Function_Libraries/Summarization/Summarization_General_Lib.py
@@ -89,7 +89,7 @@ def summarize(
         elif api_name.lower() == "custom-openai":
             return summarize_with_custom_openai(api_key, input_data, custom_prompt_arg, temp, system_message)
         elif api_name.lower() == "ollama":
-            return summarize_with_ollama(input_data, custom_prompt_arg, api_key, temp, system_message)
+            return summarize_with_ollama(input_data, custom_prompt_arg, None, api_key, temp, system_message)
         else:
             return f"Error: Invalid API Name {api_name}"