From 5120f5d944a2cb580ed3ea69e4d7bdd0c469be4e Mon Sep 17 00:00:00 2001 From: Steven Wang Date: Thu, 16 Mar 2023 18:18:23 -0500 Subject: [PATCH] Add evals for probability questions --- .../probability_questions/probability_questions.jsonl | 3 +++ evals/registry/evals/probability_questions.yaml | 8 ++++++++ 2 files changed, 11 insertions(+) create mode 100644 evals/registry/data/probability_questions/probability_questions.jsonl create mode 100644 evals/registry/evals/probability_questions.yaml diff --git a/evals/registry/data/probability_questions/probability_questions.jsonl b/evals/registry/data/probability_questions/probability_questions.jsonl new file mode 100644 index 0000000000..67591e97ed --- /dev/null +++ b/evals/registry/data/probability_questions/probability_questions.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586447e2dd41421446f8dc163babe17d47af4344b1f607d117b62827453acee3 +size 54726 diff --git a/evals/registry/evals/probability_questions.yaml b/evals/registry/evals/probability_questions.yaml new file mode 100644 index 0000000000..32bcd574e1 --- /dev/null +++ b/evals/registry/evals/probability_questions.yaml @@ -0,0 +1,8 @@ +probability-questions: + id: probability-questions.dev.v0 + description: A collection of probability questions that ChatGPT fails. Let's see if GPT-4 can do better. + metrics: [accuracy] +probability-questions.dev.v0: + class: evals.elsuite.basic.match:Match + args: + samples_jsonl: probability_questions/probability_questions.jsonl \ No newline at end of file