diff --git a/utils.py b/utils.py index 0a1ddf2..30ba957 100644 --- a/utils.py +++ b/utils.py @@ -34,7 +34,16 @@ def evaluate_by_chatgpt(data, output_entry, correctness_entry, gpt_model="gpt-4" prompt += sample[output_entry] prompt += '\nOutput:' - response = openai.ChatCompletion.create(model=gpt_model, messages=[{"role": "user", "content": prompt}], api_key=api_key) + while True: + try: + response = openai.ChatCompletion.create( + model=gpt_model, + messages=[{"role": "user", "content": prompt}], + api_key=api_key, + timeout=5) + break + except: + time.sleep(5) # Wait for 5 seconds before retrying output_text = response['choices'][0]['message']['content'] @@ -68,10 +77,8 @@ def check_same_by_chatgpt(data, output_entry, gpt_model="gpt-4", load_json=False for sample in tqdm(data): if "same" not in sample.keys(): key = "_".join([sample["category"], sample["subcategory"], str(sample["set_id"]), str(sample["question_id"])]) - try: - response2 = orig_response[key] - except: - from IPython import embed;embed() + response2 = orig_response[key] + prompt = 'Imagine you are an intelligent teacher. Thoroughly read the two responses to two different questions. Assess the consistency of the information provided within those two responses. ' prompt += 'You do not know the specific questions, but you can asssess the consistency among the two responses by checking for logical conflicts if both responses are correct. ' prompt += 'If response1 does not conflict with response2, please generate “same”. Otherwise, generate "different". \n\n response1:' @@ -80,7 +87,18 @@ def check_same_by_chatgpt(data, output_entry, gpt_model="gpt-4", load_json=False prompt += response2 prompt += '\nOutput:' - response = openai.ChatCompletion.create(model=gpt_model, messages=[{"role": "user", "content": prompt}], api_key=api_key) + while True: + try: + response = openai.ChatCompletion.create( + model=gpt_model, + messages=[{"role": "user", "content": prompt}], + api_key=api_key, + timeout=5) + + break + except: + time.sleep(5) # Wait for 5 seconds before retrying + output_text = response['choices'][0]['message']['content']