diff --git a/browsecomp_eval.py b/browsecomp_eval.py
index e246d52f..1fbaebcf 100644
--- a/browsecomp_eval.py
+++ b/browsecomp_eval.py
@@ -89,7 +89,7 @@ def grade_sample(self, question: str, correct_answer: str, response: str) -> str
         grading_response = self.grader_model(prompt_messages)
 
         match = re.search(r"correct: (yes|no)", grading_response)
-        return match.group(0) if match else "no"  # Default to "no" if no match
+        return match.group(1) if match else "no"  # Default to "no" if no match
 
     def __call__(self, sampler: SamplerBase) -> EvalResult:
             def fn(row: dict):