Skip to content

Commit

Permalink
fix: get first sme role
Browse files Browse the repository at this point in the history
  • Loading branch information
0xArdi committed Sep 12, 2023
1 parent a6ae0f5 commit 213d0e8
Showing 1 changed file with 28 additions and 20 deletions.
48 changes: 28 additions & 20 deletions tools/prediction_request_sme.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from bs4 import BeautifulSoup
from googleapiclient.discovery import build


NUM_URLS_EXTRACT = 5
DEFAULT_OPENAI_SETTINGS = {
"max_tokens": 500,
Expand Down Expand Up @@ -111,7 +110,6 @@
* Output only the JSON object. Do not include any other contents in your response.
"""


SME_GENERATION_MARKET_PROMPT = """
task question: "{question}"
"""
Expand Down Expand Up @@ -150,7 +148,6 @@
"""



def search_google(query: str, api_key: str, engine: str, num: int = 3) -> List[str]:
service = build("customsearch", "v1", developerKey=api_key)
search = (
Expand All @@ -170,19 +167,19 @@ def get_urls_from_queries(queries: List[str], api_key: str, engine: str) -> List
results = []
for query in queries:
for url in search_google(
query=query,
api_key=api_key,
engine=engine,
num=3, # Number of returned results
query=query,
api_key=api_key,
engine=engine,
num=3, # Number of returned results
):
results.append(url)
unique_results = list(set(results))
return unique_results


def extract_text(
html: str,
num_words: int = 300, # TODO: summerise using GPT instead of limit
html: str,
num_words: int = 300, # TODO: summerise using GPT instead of limit
) -> str:
"""Extract text from a single HTML document"""
soup = BeautifulSoup(html, "html.parser")
Expand All @@ -196,15 +193,16 @@ def extract_text(


def process_in_batches(
urls: List[str], window: int = 5, timeout: int = 10
urls: List[str], window: int = 5, timeout: int = 10
) -> Generator[None, None, List[Tuple[Future, str]]]:
"""Iter URLs in batches."""
with ThreadPoolExecutor() as executor:
for i in range(0, len(urls), window):
batch = urls[i : i + window]
batch = urls[i: i + window]
futures = [(executor.submit(requests.get, url, timeout=timeout), url) for url in batch]
yield futures


def extract_texts(urls: List[str], num_words: int = 300) -> List[str]:
"""Extract texts from URLs"""
max_allowed = 5
Expand All @@ -225,19 +223,19 @@ def extract_texts(urls: List[str], num_words: int = 300) -> List[str]:
except requests.exceptions.ReadTimeout:
print(f"Request timed out: {url}.")
except Exception as e:
print(f"An error occurred: {e}")
print(f"An error occurred: {e}")
if stop:
break
return extracted_texts


def fetch_additional_information(
prompt: str,
engine: str,
temperature: float,
max_tokens: int,
google_api_key: str,
google_engine: str,
prompt: str,
engine: str,
temperature: float,
max_tokens: int,
google_api_key: str,
google_engine: str,
) -> str:
"""Fetch additional information."""
url_query_prompt = URL_QUERY_PROMPT.format(user_prompt=prompt)
Expand Down Expand Up @@ -288,8 +286,7 @@ def get_sme_role(engine, temperature, max_tokens, prompt) -> Tuple[str, str]:
stop=None,
)
generated_sme_roles = response.choices[0].message.content
# check whether the generated_sme_roles is valid json
sme = json.loads(generated_sme_roles)
sme = json.loads(generated_sme_roles)[0]
return sme["sme"], sme["sme_introduction"]


Expand Down Expand Up @@ -353,3 +350,14 @@ def run(**kwargs) -> Tuple[str, Optional[Dict[str, Any]]]:
return response.choices[0].message.content, None


prompt = {
'prompt': '"Will the Disney Royal collections be available for purchase after the World Princess Week by 29 August 2023?" and the `yes` option represented by `Yes` and the `no` option represented by `No`, what are the respective probabilities of `p_yes` and `p_no` occurring?',
'tool': 'prediction-online-sme', 'nonce': '3a32f8ad-efd1-43b9-adc7-967065321a10',
"api_keys": {
"openai": "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",
"google_api_key": "AIzaSyDgX4gLJh5BUzBGIzUDpP_moqMLP6lAoEc",
"google_engine_id": "23252845a8b224090",
},
}

print(run(**prompt))

0 comments on commit 213d0e8

Please sign in to comment.