Skip to content

Commit

Permalink
Update arabic_evals.py: Fix custom arabic tasks [2nd attempt] (#444)
Browse files Browse the repository at this point in the history
Fix alghafa prompt function by explicitly determining the list of choices based on task_name. 
(Not all subsets of AlGhafa Native share same columns)

---------

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 20, 2024
1 parent a1c610d commit fbca143
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 14 deletions.
1 change: 0 additions & 1 deletion .github/workflows/trufflehog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ jobs:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main

7 changes: 2 additions & 5 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None):
choices=valid_keys_arabic, # Return only valid choices (Arabic keys)
gold_index=answer_index, # Correct index in the valid Arabic keys
instruction=instruction,
target_for_fewshot_sorting=valid_keys_arabic[answer_index], # Correct answer in Arabic form
)


Expand Down Expand Up @@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None):
choices=[str(i) for i in range(1, len(choices) + 1)], # List of strings instead of ints
gold_index=answer_index,
instruction=instruction,
target_for_fewshot_sorting=str(answer_index), # Assuming it's sorted based on the number
)


Expand Down Expand Up @@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None):
choices=LETTER_INDICES_AR[:3],
gold_index=answer_index,
instruction=instruction,
target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index],
)


Expand Down Expand Up @@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None):
def alghafa_pfn(line, task_name: str = None):
question = line["query"]
answer_index = int(line["label"])
choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
allowed_keys = [f"sol{i}" for i in range(1, 6)]
choices = [line[key] for key in allowed_keys if key in line]

instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
query = f"{instruction}السؤال: {question}\n"
Expand Down Expand Up @@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None):
choices=choices,
gold_index=answer_index, # Correct index in the valid keys
instruction=instruction,
target_for_fewshot_sorting=valid_keys_latin[answer_index], # Correct answer in Latin form
)


Expand Down
1 change: 0 additions & 1 deletion docs/source/adding-a-new-metric.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,3 @@ if __name__ == "__main__":

You can then give your custom metric to lighteval by using `--custom-tasks
path_to_your_file` when launching it.

12 changes: 6 additions & 6 deletions docs/source/contributing-to-multilingual-evaluations.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ We welcome translations in your language!

To contribute, you'll need to
1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file
2. Edit the file to add or expand the literal for your language of interest.
2. Edit the file to add or expand the literal for your language of interest.

```python
Language.ENGLISH: TranslationLiterals(
Expand Down Expand Up @@ -42,7 +42,7 @@ To contribute, you'll need to

## Contributing a new multilingual task

You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.

Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.

Expand All @@ -58,7 +58,7 @@ your_tasks = [
LightevalTaskConfig(
# Name of your evaluation
name=f"evalname_{language.value}_{formulation.name.lower()}",
# The evaluation is community contributed
# The evaluation is community contributed
suite=["community"],
# This will automatically get the correct metrics for your chosen formulation
metric=get_metrics_for_formulation(
Expand All @@ -72,7 +72,7 @@ your_tasks = [
# In this function, you choose which template to follow and for which language and formulation
prompt_function=get_template_prompt_function(
language=language,
# then use the adapter to define the mapping between the
# then use the adapter to define the mapping between the
# keys of the template (left), and the keys of your dataset
# (right)
# To know which template keys are required and available,
Expand All @@ -83,9 +83,9 @@ your_tasks = [
},
formulation=formulation,
),
# You can also add specific filters to remove irrelevant samples
# You can also add specific filters to remove irrelevant samples
hf_filter=lambda line: line["label"] in <condition>,
# You then select your huggingface dataset as well as
# You then select your huggingface dataset as well as
# the splits available for evaluation
hf_repo=<dataset>,
hf_subset=<subset>,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/using-the-python-api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():
env_config=EnvConfig(cache_dir="tmp/"),
# Remove the 2 parameters below once your configuration is tested
override_batch_size=1,
max_samples=10
max_samples=10
)

model_config = VLLMModelConfig(
Expand Down

0 comments on commit fbca143

Please sign in to comment.