Skip to content

Commit

Permalink
Merge pull request #35 from rmusser01/main
Browse files Browse the repository at this point in the history
Fix for Ollama (tested)
  • Loading branch information
rmusser01 authored Oct 13, 2024
2 parents d28ecd0 + 82aef0e commit 9503e7f
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 76 deletions.
2 changes: 1 addition & 1 deletion App_Function_Libraries/Chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def chat_api_call(api_endpoint, api_key, input_data, prompt, temp, system_messag
response = chat_with_huggingface(api_key, input_data, prompt, temp) # , system_message)

elif api_endpoint.lower() == "ollama":
response = chat_with_ollama(input_data, prompt, temp, system_message)
response = chat_with_ollama(input_data, prompt, None, api_key, temp, system_message)

elif api_endpoint.lower() == "aphrodite":
response = chat_with_aphrodite(input_data, prompt, temp, system_message)
Expand Down
2 changes: 1 addition & 1 deletion App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def summarize_explain_text(message, api_endpoint, api_key, summarization, explan
summarization_response = summarize_with_huggingface(api_key, input_data, user_prompt,
temp) # , system_prompt)
elif api_endpoint.lower() == "ollama":
summarization_response = summarize_with_ollama(input_data, user_prompt, temp, system_prompt)
summarization_response = summarize_with_ollama(input_data, user_prompt, None, api_key, temp, system_prompt)
else:
raise ValueError(f"Unsupported API endpoint: {api_endpoint}")
except Exception as e:
Expand Down
128 changes: 95 additions & 33 deletions App_Function_Libraries/LLM_API_Calls_Local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This library is used to perform summarization with a 'local' inference engine.
#
####
import logging
from typing import Union

####################
Expand Down Expand Up @@ -149,7 +150,7 @@ def chat_with_llama(input_data, custom_prompt, temp, api_url="http://127.0.0.1:8
#'min_p': '0.05',
#'n_predict': '-1',
#'n_keep': '0',
#'stream': 'false',
'stream': 'True',
#'stop': '["\n"]',
#'tfs_z': '1.0',
#'repeat_penalty': '1.1',
Expand Down Expand Up @@ -432,10 +433,19 @@ def chat_with_aphrodite(input_data, custom_prompt_input, api_key=None, api_IP="h
return "Error summarizing with Aphrodite."


# FIXME
def chat_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/api/chat", api_key=None, temp=None, system_message=None, model=None):
def chat_with_ollama(
input_data,
custom_prompt,
api_url="http://127.0.0.1:11434/v1/chat/completions",
api_key=None,
temp=None,
system_message=None,
model=None,
max_retries=5,
retry_delay=20
):
try:
logging.debug("ollama: Loading and validating configurations")
logging.debug("Ollama: Loading and validating configurations")
loaded_config_data = load_and_log_configs()
if loaded_config_data is None:
logging.error("Failed to load configuration data")
Expand All @@ -453,7 +463,19 @@ def chat_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/
else:
logging.warning("Ollama: No API key found in config file")

model = loaded_config_data['models']['ollama']
# Set model from parameter or config
if model is None:
model = loaded_config_data['models'].get('ollama')
if model is None:
logging.error("Ollama: Model not found in config file")
return "Ollama: Model not found in config file"

# Set api_url from parameter or config
if api_url is None:
api_url = loaded_config_data['local_api_ip'].get('ollama')
if api_url is None:
logging.error("Ollama: API URL not found in config file")
return "Ollama: API URL not found in config file"

# Load transcript
logging.debug("Ollama: Loading JSON data")
Expand Down Expand Up @@ -486,48 +508,88 @@ def chat_with_ollama(input_data, custom_prompt, api_url="http://127.0.0.1:11434/
'accept': 'application/json',
'content-type': 'application/json',
}
if len(ollama_api_key) > 5:
if ollama_api_key and len(ollama_api_key) > 5:
headers['Authorization'] = f'Bearer {ollama_api_key}'

ollama_prompt = f"{custom_prompt} \n\n\n\n{text}"
if system_message is None:
system_message = "You are a helpful AI assistant."
logging.debug(f"llama: Prompt being sent is {ollama_prompt}")
ollama_prompt = f"{custom_prompt}\n\n{text}"
if system_message is None:
system_message = "You are a helpful AI assistant."
logging.debug(f"Ollama: Prompt being sent is: {ollama_prompt}")

data = {
data_payload = {
"model": model,
"messages": [
{"role": "system",
"content": system_message
},
{"role": "user",
"content": ollama_prompt
}
{
"role": "system",
"content": system_message
},
{
"role": "user",
"content": ollama_prompt
}
],
}

logging.debug("Ollama: Submitting request to API endpoint")
print("Ollama: Submitting request to API endpoint")
response = requests.post(api_url, headers=headers, json=data)
response_data = response.json()
logging.debug("API Response Data: %s", response_data)
for attempt in range(1, max_retries + 1):
logging.debug("Ollama: Submitting request to API endpoint")
print("Ollama: Submitting request to API endpoint")
try:
response = requests.post(api_url, headers=headers, json=data_payload, timeout=30)
response.raise_for_status() # Raises HTTPError for bad responses
response_data = response.json()
except requests.exceptions.Timeout:
logging.error("Ollama: Request timed out.")
return "Ollama: Request timed out."
except requests.exceptions.HTTPError as http_err:
logging.error(f"Ollama: HTTP error occurred: {http_err}")
return f"Ollama: HTTP error occurred: {http_err}"
except requests.exceptions.RequestException as req_err:
logging.error(f"Ollama: Request exception: {req_err}")
return f"Ollama: Request exception: {req_err}"
except json.JSONDecodeError:
logging.error("Ollama: Failed to decode JSON response")
return "Ollama: Failed to decode JSON response."
except Exception as e:
logging.error(f"Ollama: An unexpected error occurred: {str(e)}")
return f"Ollama: An unexpected error occurred: {str(e)}"

logging.debug(f"API Response Data: {response_data}")

if response.status_code == 200:
# if 'X' in response_data:
logging.debug(response_data)
summary = response_data['message']['content'].strip()
logging.debug("Ollama: Chat request successful")
print("\n\nChat request successful.")
return summary
else:
logging.error(f"\n\nOllama: API request failed with status code {response.status_code}: {response.text}")
return f"Ollama: API request failed: {response.text}"
if response.status_code == 200:
# Inspect available keys
available_keys = list(response_data.keys())
logging.debug(f"Ollama: Available keys in response: {available_keys}")

# Attempt to retrieve 'response'
summary = None
if 'response' in response_data and response_data['response']:
summary = response_data['response'].strip()
elif 'choices' in response_data and len(response_data['choices']) > 0:
choice = response_data['choices'][0]
if 'message' in choice and 'content' in choice['message']:
summary = choice['message']['content'].strip()

if summary:
logging.debug("Ollama: Chat request successful")
print("\n\nChat request successful.")
return summary
elif response_data.get('done_reason') == 'load':
logging.warning(f"Ollama: Model is loading. Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
else:
logging.error("Ollama: API response does not contain 'response' or 'choices'.")
return "Ollama: API response does not contain 'response' or 'choices'."
else:
logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
return f"Ollama: API request failed: {response.text}"

logging.error("Ollama: Maximum retry attempts reached. Model is still loading.")
return "Ollama: Maximum retry attempts reached. Model is still loading."

except Exception as e:
logging.error("\n\nOllama: Error in processing: %s", str(e))
return f"Ollama: Error occurred while processing summary with ollama: {str(e)}"
return f"Ollama: Error occurred while processing summary with Ollama: {str(e)}"


def chat_with_vllm(
input_data: Union[str, dict, list],
Expand Down
2 changes: 1 addition & 1 deletion App_Function_Libraries/RAG/RAG_Libary_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def generate_answer(api_choice: str, context: str, query: str) -> str:

elif api_choice.lower() == "ollama":
from App_Function_Libraries.Summarization.Local_Summarization_Lib import summarize_with_ollama
return summarize_with_ollama(prompt, "", config['Local-API']['ollama_api_key'], None, None, None)
return summarize_with_ollama(prompt, "", config['Local-API']['ollama_api_IP'], config['Local-API']['ollama_api_key'], None, None, None)

elif api_choice.lower() == "custom_openai_api":
logging.debug(f"RAG Answer Gen: Trying with Custom_OpenAI API")
Expand Down
133 changes: 95 additions & 38 deletions App_Function_Libraries/Summarization/Local_Summarization_Lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import json
import logging
import os
import time
from typing import Union

import requests
Expand Down Expand Up @@ -640,10 +641,19 @@ def summarize_with_vllm(
return f"Error: Unexpected error during vLLM summarization - {str(e)}"


# FIXME - update to be a summarize request
def summarize_with_ollama(input_data, custom_prompt, api_key=None, temp=None, system_message=None, model=None, api_url="http://127.0.0.1:11434/api/generate",):
def summarize_with_ollama(
input_data,
custom_prompt,
api_url="http://127.0.0.1:11434/v1/chat/completions",
api_key=None,
temp=None,
system_message=None,
model=None,
max_retries=5,
retry_delay=20
):
try:
logging.debug("ollama: Loading and validating configurations")
logging.debug("Ollama: Loading and validating configurations")
loaded_config_data = load_and_log_configs()
if loaded_config_data is None:
logging.error("Failed to load configuration data")
Expand All @@ -661,7 +671,19 @@ def summarize_with_ollama(input_data, custom_prompt, api_key=None, temp=None, sy
else:
logging.warning("Ollama: No API key found in config file")

model = loaded_config_data['models']['ollama']
# Set model from parameter or config
if model is None:
model = loaded_config_data['models'].get('ollama')
if model is None:
logging.error("Ollama: Model not found in config file")
return "Ollama: Model not found in config file"

# Set api_url from parameter or config
if api_url is None:
api_url = loaded_config_data['local_api_ip'].get('ollama')
if api_url is None:
logging.error("Ollama: API URL not found in config file")
return "Ollama: API URL not found in config file"

# Load transcript
logging.debug("Ollama: Loading JSON data")
Expand Down Expand Up @@ -690,57 +712,92 @@ def summarize_with_ollama(input_data, custom_prompt, api_key=None, temp=None, sy
else:
raise ValueError("Ollama: Invalid input data format")

if custom_prompt is None:
custom_prompt = f"{summarizer_prompt}\n\n\n\n{text}"
else:
custom_prompt = f"{custom_prompt}\n\n\n\n{text}"

headers = {
'accept': 'application/json',
'content-type': 'application/json',
}
if len(ollama_api_key) > 5:
if ollama_api_key and len(ollama_api_key) > 5:
headers['Authorization'] = f'Bearer {ollama_api_key}'

ollama_prompt = f"{custom_prompt} \n\n\n\n{text}"
if system_message is None:
system_message = "You are a helpful AI assistant."
logging.debug(f"llama: Prompt being sent is {ollama_prompt}")
ollama_prompt = f"{custom_prompt}\n\n{text}"
if system_message is None:
system_message = "You are a helpful AI assistant."
logging.debug(f"Ollama: Prompt being sent is: {ollama_prompt}")

data = {
data_payload = {
"model": model,
"messages": [
{"role": "system",
"content": system_message
},
{"role": "user",
"content": ollama_prompt
}
{
"role": "system",
"content": system_message
},
{
"role": "user",
"content": ollama_prompt
}
],
'temperature': temp
}

logging.debug("Ollama: Submitting request to API endpoint")
print("Ollama: Submitting request to API endpoint")
response = requests.post(api_url, headers=headers, json=data)
response_data = response.json()
logging.debug("API Response Data: %s", response_data)
for attempt in range(1, max_retries + 1):
logging.debug("Ollama: Submitting request to API endpoint")
print("Ollama: Submitting request to API endpoint")
try:
response = requests.post(api_url, headers=headers, json=data_payload, timeout=30)
response.raise_for_status() # Raises HTTPError for bad responses
response_data = response.json()
except requests.exceptions.Timeout:
logging.error("Ollama: Request timed out.")
return "Ollama: Request timed out."
except requests.exceptions.HTTPError as http_err:
logging.error(f"Ollama: HTTP error occurred: {http_err}")
return f"Ollama: HTTP error occurred: {http_err}"
except requests.exceptions.RequestException as req_err:
logging.error(f"Ollama: Request exception: {req_err}")
return f"Ollama: Request exception: {req_err}"
except json.JSONDecodeError:
logging.error("Ollama: Failed to decode JSON response")
return "Ollama: Failed to decode JSON response."
except Exception as e:
logging.error(f"Ollama: An unexpected error occurred: {str(e)}")
return f"Ollama: An unexpected error occurred: {str(e)}"

logging.debug(f"API Response Data: {response_data}")

if response.status_code == 200:
# if 'X' in response_data:
logging.debug(response_data)
summary = response_data['content'].strip()
logging.debug("Ollama: Summarization successful")
print("Summarization successful.")
return summary
else:
logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
return f"Ollama: API request failed: {response.text}"
if response.status_code == 200:
# Inspect available keys
available_keys = list(response_data.keys())
logging.debug(f"Ollama: Available keys in response: {available_keys}")

# Attempt to retrieve 'response'
summary = None
if 'response' in response_data and response_data['response']:
summary = response_data['response'].strip()
elif 'choices' in response_data and len(response_data['choices']) > 0:
choice = response_data['choices'][0]
if 'message' in choice and 'content' in choice['message']:
summary = choice['message']['content'].strip()

if summary:
logging.debug("Ollama: Chat request successful")
print("\n\nChat request successful.")
return summary
elif response_data.get('done_reason') == 'load':
logging.warning(f"Ollama: Model is loading. Attempt {attempt} of {max_retries}. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
else:
logging.error("Ollama: API response does not contain 'response' or 'choices'.")
return "Ollama: API response does not contain 'response' or 'choices'."
else:
logging.error(f"Ollama: API request failed with status code {response.status_code}: {response.text}")
return f"Ollama: API request failed: {response.text}"

logging.error("Ollama: Maximum retry attempts reached. Model is still loading.")
return "Ollama: Maximum retry attempts reached. Model is still loading."

except Exception as e:
logging.error("Ollama: Error in processing: %s", str(e))
return f"Ollama: Error occurred while processing summary with ollama: {str(e)}"
logging.error("\n\nOllama: Error in processing: %s", str(e))
return f"Ollama: Error occurred while processing summary with Ollama: {str(e)}"


# FIXME - update to be a summarize request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def summarize(
elif api_name.lower() == "custom-openai":
return summarize_with_custom_openai(api_key, input_data, custom_prompt_arg, temp, system_message)
elif api_name.lower() == "ollama":
return summarize_with_ollama(input_data, custom_prompt_arg, api_key, temp, system_message)
return summarize_with_ollama(input_data, custom_prompt_arg, None, api_key, temp, system_message)
else:
return f"Error: Invalid API Name {api_name}"

Expand Down
Loading

0 comments on commit 9503e7f

Please sign in to comment.