Skip to content

Commit

Permalink
Merge branch 'master' of github.com:assafelovic/gpt-researcher
Browse files Browse the repository at this point in the history
  • Loading branch information
assafelovic committed Oct 31, 2024
2 parents cf031ab + e1f0fc5 commit e714999
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 50 deletions.
7 changes: 0 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,6 @@ RUN pip install --no-cache-dir -r requirements.txt && \
# Stage 3: Final stage with non-root user and app
FROM gpt-researcher-install AS gpt-researcher

# Use environment variables for API keys (defaults can be overridden at runtime)
ARG OPENAI_API_KEY
ARG TAVILY_API_KEY

ENV OPENAI_API_KEY=${OPENAI_API_KEY}
ENV TAVILY_API_KEY=${TAVILY_API_KEY}

# Create a non-root user for security
RUN useradd -ms /bin/bash gpt-researcher && \
chown -R gpt-researcher:gpt-researcher /usr/src/app
Expand Down
2 changes: 1 addition & 1 deletion gpt_researcher/llm_provider/generic/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def from_provider(cls, provider: str, **kwargs: Any):
llm = ChatFireworks(**kwargs)
elif provider == "ollama":
_check_pkg("langchain_community")
from langchain_community.chat_models import ChatOllama
from langchain_ollama import ChatOllama

llm = ChatOllama(base_url=os.environ["OLLAMA_BASE_URL"], **kwargs)
elif provider == "together":
Expand Down
2 changes: 1 addition & 1 deletion gpt_researcher/memory/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, embedding_provider: str, model: str, **embdding_kwargs: Any):
_embeddings = None
match embedding_provider:
case "ollama":
from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings

_embeddings = OllamaEmbeddings(
model=model,
Expand Down
84 changes: 58 additions & 26 deletions gpt_researcher/retrievers/searx/searx.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,77 @@
# Tavily API Retriever

# libraries
import os
from langchain_community.utilities import SearxSearchWrapper
import json
import requests
from typing import List, Dict
from urllib.parse import urljoin


class SearxSearch():
"""
Tavily API Retriever
SearxNG API Retriever
"""
def __init__(self, query):
def __init__(self, query: str):
"""
Initializes the TavilySearch object
Initializes the SearxSearch object
Args:
query:
query: Search query string
"""
self.query = query
self.api_key = self.get_api_key()
self.base_url = self.get_searxng_url()

def get_api_key(self):
def get_searxng_url(self) -> str:
"""
Gets the Tavily API key
Gets the SearxNG instance URL from environment variables
Returns:
str: Base URL of SearxNG instance
"""
# Get the API key
try:
api_key = os.environ["SEARX_URL"]
except:
raise Exception("Searx URL key not found. Please set the SEARX_URL environment variable. "
"You can get your key from https://searx.space/")
return api_key
base_url = os.environ["SEARX_URL"]
if not base_url.endswith('/'):
base_url += '/'
return base_url
except KeyError:
raise Exception(
"SearxNG URL not found. Please set the SEARX_URL environment variable. "
"You can find public instances at https://searx.space/"
)

def search(self, max_results=7):
def search(self, max_results: int = 10) -> List[Dict[str, str]]:
"""
Searches the query
Searches the query using SearxNG API
Args:
max_results: Maximum number of results to return
Returns:
List of dictionaries containing search results
"""
searx = SearxSearchWrapper(searx_host=os.environ["SEARX_URL"])
results = searx.results(self.query, max_results)
# Normalizing results to match the format of the other search APIs
search_response = [{"href": obj["link"], "body": obj["snippet"]} for obj in results]
return search_response
search_url = urljoin(self.base_url, "search")

params = {
# The search query.
'q': self.query,
# Output format of results. Format needs to be activated in searxng config.
'format': 'json'
}

try:
response = requests.get(
search_url,
params=params,
headers={'Accept': 'application/json'}
)
response.raise_for_status()
results = response.json()

# Normalize results to match the expected format
search_response = []
for result in results.get('results', [])[:max_results]:
search_response.append({
"href": result.get('url', ''),
"body": result.get('content', '')
})

return search_response

except requests.exceptions.RequestException as e:
raise Exception(f"Error querying SearxNG: {str(e)}")
except json.JSONDecodeError:
raise Exception("Error parsing SearxNG response")
2 changes: 1 addition & 1 deletion gpt_researcher/scraper/arxiv/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ def scrape(self):
"""
query = self.link.split("/")[-1]
retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
docs = retriever.get_relevant_documents(query=query)
docs = retriever.invoke(query=query)
return docs[0].page_content
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ markdown
langchain
langchain_community
langchain-openai
langchain-ollama
langgraph
tiktoken
gpt-researcher
Expand Down
37 changes: 23 additions & 14 deletions tests/gptr-logs-handler.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,35 @@
from typing import Dict, Any
import logging
from typing import List, Dict, Any
import asyncio
from gpt_researcher import GPTResearcher

class CustomLogsHandler:
"""A custom Logs handler class to handle JSON data."""
def __init__(self):
self.logs = [] # Initialize logs to store data
self.logs: List[Dict[str, Any]] = [] # Initialize logs to store data
logging.basicConfig(level=logging.INFO) # Set up logging configuration

async def send_json(self, data: Dict[str, Any]) -> None:
"""Send JSON data and log it."""
self.logs.append(data) # Append data to logs
print(f"My custom Log: {data}") # For demonstration, print the log

async def run():
# Define the necessary parameters with sample values

"""Send JSON data and log it, with error handling."""
try:
self.logs.append(data) # Append data to logs
logging.info(f"My custom Log: {data}") # Use logging instead of print
except Exception as e:
logging.error(f"Error logging data: {e}") # Log any errors

def clear_logs(self) -> None:
"""Clear the logs."""
self.logs.clear() # Clear the logs list
logging.info("Logs cleared.") # Log the clearing action

async def run() -> None:
"""Run the research process and generate a report."""
query = "What happened in the latest burning man floods?"
report_type = "research_report" # Type of report to generate
report_source = "online" # Could specify source like 'online', 'books', etc.
tone = "informative" # Tone of the report ('informative', 'casual', etc.)
config_path = None # Path to a config file, if needed
report_type = "research_report"
report_source = "online"
tone = "informative"
config_path = None

# Initialize researcher with a custom WebSocket
custom_logs_handler = CustomLogsHandler()

researcher = GPTResearcher(
Expand All @@ -35,6 +43,7 @@ async def run():

await researcher.conduct_research() # Conduct the research
report = await researcher.write_report() # Write the research report
logging.info("Report generated successfully.") # Log report generation

return report

Expand Down

0 comments on commit e714999

Please sign in to comment.