-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcall_llm.py
138 lines (119 loc) · 4.64 KB
/
call_llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from google import genai
import os
import logging
import json
from datetime import datetime
# Configure logging
log_directory = os.getenv("LOG_DIR", "logs")
os.makedirs(log_directory, exist_ok=True)
log_file = os.path.join(log_directory, f"llm_calls_{datetime.now().strftime('%Y%m%d')}.log")
# Set up logger
logger = logging.getLogger("llm_logger")
logger.setLevel(logging.INFO)
logger.propagate = False # Prevent propagation to root logger
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
# Cache configuration from environment variables
cache_file = os.getenv("CACHE_FILE", "llm_cache.json")
cache_enabled = os.getenv("CACHE_ENABLED", "true").lower() == "true"
# By default, we use Google Gemini 2.5 pro, as it shows great performance for code understanding
def call_llm(prompt: str, use_cache: bool = None) -> str:
# Determine if cache should be used (parameter overrides environment variable)
if use_cache is None:
use_cache = cache_enabled
# Log the prompt
logger.info(f"PROMPT: {prompt}")
# Check cache if enabled
if use_cache:
# Load cache from disk
cache = {}
if os.path.exists(cache_file):
try:
with open(cache_file, 'r') as f:
cache = json.load(f)
except Exception as e:
logger.warning(f"Failed to load cache, starting with empty cache: {e}")
# Return from cache if exists
if prompt in cache:
logger.info(f"RESPONSE (cached): {cache[prompt]}")
return cache[prompt]
# Call the LLM if not in cache or cache disabled
try:
# Check if using API key or Vertex AI
api_key = os.getenv("GEMINI_API_KEY")
if api_key:
# Use API key authentication
client = genai.Client(api_key=api_key)
else:
# Use Vertex AI authentication
client = genai.Client(
vertexai=True,
project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
location=os.getenv("GEMINI_LOCATION", "us-central1")
)
model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
response = client.models.generate_content(
model=model,
contents=[prompt]
)
response_text = response.text
# Log the response
logger.info(f"RESPONSE: {response_text}")
# Update cache if enabled
if use_cache:
# Load cache again to avoid overwrites
cache = {}
if os.path.exists(cache_file):
try:
with open(cache_file, 'r') as f:
cache = json.load(f)
except Exception as e:
logger.warning(f"Failed to reload cache: {e}")
# Add to cache and save
cache[prompt] = response_text
try:
with open(cache_file, 'w') as f:
json.dump(cache, f)
except Exception as e:
logger.error(f"Failed to save cache: {e}")
return response_text
except Exception as e:
logger.error(f"Error calling Gemini API: {e}")
raise Exception(f"Failed to generate content with Gemini: {e}")
# # Use Anthropic Claude 3.7 Sonnet Extended Thinking
# def call_llm(prompt, use_cache: bool = True):
# from anthropic import Anthropic
# client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY", "your-api-key"))
# response = client.messages.create(
# model="claude-3-7-sonnet-20250219",
# max_tokens=21000,
# thinking={
# "type": "enabled",
# "budget_tokens": 20000
# },
# messages=[
# {"role": "user", "content": prompt}
# ]
# )
# return response.content[1].text
# # Use OpenAI o1
# def call_llm(prompt, use_cache: bool = True):
# from openai import OpenAI
# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
# r = client.chat.completions.create(
# model="o1",
# messages=[{"role": "user", "content": prompt}],
# response_format={
# "type": "text"
# },
# reasoning_effort="medium",
# store=False
# )
# return r.choices[0].message.content
if __name__ == "__main__":
test_prompt = "Hello, how are you?"
# First call - should hit the API
print("Making call...")
response1 = call_llm(test_prompt, use_cache=False)
print(f"Response: {response1}")