diff --git a/scripts/mscan/create-mscan-jsonnets.py b/scripts/mscan/create-mscan-jsonnets.py new file mode 100644 index 0000000..fcfe433 --- /dev/null +++ b/scripts/mscan/create-mscan-jsonnets.py @@ -0,0 +1,90 @@ + +def get_jsonnet(language, split, has_dev): + return f"""{{ + name: 'mSCAN (language: {language}, split: {split})', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN ({language}) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: {{ + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/{language}/{split}/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/{language}/{split}/test.jsonl', + {f"validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/{language}/{split}/dev.jsonl'," if has_dev else ""} + }}, + + has_validation_set: {"true" if has_dev else "false"}, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: {{ + finetuning: {{ + objective: 'maximum_likelihood', + }}, + + prompt_based_testing: {{ + prompt_builder: {{ + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\\n\\n', + stop_string: '\\n\\n', + }} + }}, + + }}, + + evaluation_metrics: [ + {{ + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + }} + ] +}}""" + + +languages = ["cmn", "eng", "fra", "hin", "rus"] +splits = { + "add_prim_jump": { + "has_dev": False + }, + "add_prim_turn_left": { + "has_dev": False + }, + "length": { + "has_dev": False + }, + "mcd1": { + "has_dev": True + }, + "mcd2": { + "has_dev": True + }, + "mcd3": { + "has_dev": True + }, + "simple": { + "has_dev": False + }, +} + +for language in languages: + for split_name, split_args in splits.items(): + jsonnet = get_jsonnet(language, split_name, split_args["has_dev"]) + with open(f"src/genbench/tasks/mscan/{language}_{split_name}/config.jsonnet", "w") as f: + f.write(jsonnet) + diff --git a/scripts/mscan/create-mscan.sh b/scripts/mscan/create-mscan.sh new file mode 100755 index 0000000..b98a718 --- /dev/null +++ b/scripts/mscan/create-mscan.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +genbench-cli create-task --id "mscan" --name "mSCAN" \ + -s cmn_add_prim_jump \ + -s cmn_add_prim_turn_left \ + -s cmn_length \ + -s cmn_mcd1 \ + -s cmn_mcd2 \ + -s cmn_mcd3 \ + -s cmn_simple \ + -s eng_add_prim_jump \ + -s eng_add_prim_turn_left \ + -s eng_length \ + -s eng_mcd1 \ + -s eng_mcd2 \ + -s eng_mcd3 \ + -s eng_simple \ + -s fra_add_prim_jump \ + -s fra_add_prim_turn_left \ + -s fra_length \ + -s fra_mcd1 \ + -s fra_mcd2 \ + -s fra_mcd3 \ + -s fra_simple \ + -s hin_add_prim_jump \ + -s hin_add_prim_turn_left \ + -s hin_length \ + -s hin_mcd1 \ + -s hin_mcd2 \ + -s hin_mcd3 \ + -s hin_simple \ + -s rus_add_prim_jump \ + -s rus_add_prim_turn_left \ + -s rus_length \ + -s rus_mcd1 \ + -s rus_mcd2 \ + -s rus_mcd3 \ + -s rus_simple diff --git a/src/genbench/tasks/multilingual_scan/GenBench_eval_card.png b/src/genbench/tasks/multilingual_scan/GenBench_eval_card.png new file mode 100644 index 0000000..e2488be Binary files /dev/null and b/src/genbench/tasks/multilingual_scan/GenBench_eval_card.png differ diff --git a/src/genbench/tasks/multilingual_scan/__init__.py b/src/genbench/tasks/multilingual_scan/__init__.py new file mode 100644 index 0000000..98caede --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/__init__.py @@ -0,0 +1,5 @@ +from genbench import TaskDict + + +class Mscan(TaskDict): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/config.jsonnet new file mode 100644 index 0000000..60f1bb3 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: add_prim_jump)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_jump/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_jump/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/doc.md b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/doc.md new file mode 100644 index 0000000..e0e8dc5 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_add_prim_jump) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_add_prim_jump).* + +## Examples +*Give some examples of the mSCAN (cmn_add_prim_jump).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_add_prim_jump).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_add_prim_jump) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/task.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/task.py new file mode 100644 index 0000000..6e6e74c --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnAddPrimJump(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/config.jsonnet new file mode 100644 index 0000000..ebd68c8 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: add_prim_turn_left)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_turn_left/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_turn_left/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/doc.md b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/doc.md new file mode 100644 index 0000000..fa4e687 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_add_prim_turn_left) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_add_prim_turn_left).* + +## Examples +*Give some examples of the mSCAN (cmn_add_prim_turn_left).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_add_prim_turn_left).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_add_prim_turn_left) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/task.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/task.py new file mode 100644 index 0000000..877634c --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnAddPrimTurnLeft(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_length/config.jsonnet new file mode 100644 index 0000000..64f1e40 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_length/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: length)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/length/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/length/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/doc.md b/src/genbench/tasks/multilingual_scan/cmn_length/doc.md new file mode 100644 index 0000000..7f55b74 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_length/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_length) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_length).* + +## Examples +*Give some examples of the mSCAN (cmn_length).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_length).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_length) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/task.py b/src/genbench/tasks/multilingual_scan/cmn_length/task.py new file mode 100644 index 0000000..c35dd82 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_length/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnLength(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd1/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_mcd1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd1/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_mcd1/config.jsonnet new file mode 100644 index 0000000..ea36061 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd1/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: mcd1)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd1/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd1/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd1/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd1/doc.md b/src/genbench/tasks/multilingual_scan/cmn_mcd1/doc.md new file mode 100644 index 0000000..938963f --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd1/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_mcd1) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_mcd1).* + +## Examples +*Give some examples of the mSCAN (cmn_mcd1).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_mcd1).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_mcd1) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd1/task.py b/src/genbench/tasks/multilingual_scan/cmn_mcd1/task.py new file mode 100644 index 0000000..dbce13d --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd1/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnMcd1(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd2/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_mcd2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd2/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_mcd2/config.jsonnet new file mode 100644 index 0000000..ba69cbe --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd2/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: mcd2)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd2/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd2/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd2/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd2/doc.md b/src/genbench/tasks/multilingual_scan/cmn_mcd2/doc.md new file mode 100644 index 0000000..ba34eed --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd2/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_mcd2) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_mcd2).* + +## Examples +*Give some examples of the mSCAN (cmn_mcd2).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_mcd2).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_mcd2) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd2/task.py b/src/genbench/tasks/multilingual_scan/cmn_mcd2/task.py new file mode 100644 index 0000000..e1208eb --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd2/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnMcd2(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd3/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_mcd3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd3/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_mcd3/config.jsonnet new file mode 100644 index 0000000..3997db4 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd3/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: mcd3)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd3/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd3/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/mcd3/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd3/doc.md b/src/genbench/tasks/multilingual_scan/cmn_mcd3/doc.md new file mode 100644 index 0000000..4563e0e --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd3/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_mcd3) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_mcd3).* + +## Examples +*Give some examples of the mSCAN (cmn_mcd3).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_mcd3).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_mcd3) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd3/task.py b/src/genbench/tasks/multilingual_scan/cmn_mcd3/task.py new file mode 100644 index 0000000..01223b9 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_mcd3/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnMcd3(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/cmn_simple/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_simple/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/cmn_simple/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_simple/config.jsonnet new file mode 100644 index 0000000..f475651 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_simple/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: cmn, split: simple)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (cmn) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/simple/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/simple/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/cmn_simple/doc.md b/src/genbench/tasks/multilingual_scan/cmn_simple/doc.md new file mode 100644 index 0000000..addcb76 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_simple/doc.md @@ -0,0 +1,19 @@ +# mSCAN (cmn_simple) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_simple).* + +## Examples +*Give some examples of the mSCAN (cmn_simple).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (cmn_simple).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (cmn_simple) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/cmn_simple/task.py b/src/genbench/tasks/multilingual_scan/cmn_simple/task.py new file mode 100644 index 0000000..5970d1d --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/cmn_simple/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanCmnSimple(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/config.jsonnet b/src/genbench/tasks/multilingual_scan/config.jsonnet new file mode 100644 index 0000000..8563022 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN', + + description: 'Multilingual SCAN aims to measure compositional and cross-lingual generalization in multilingual LLMs.', + + keywords: [ + 'cross-lingual', + 'compositional', + 'LLMs', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + subtasks_order: [ + 'cmn_add_prim_jump', + 'cmn_add_prim_turn_left', + 'cmn_length', + 'cmn_mcd1', + 'cmn_mcd2', + 'cmn_mcd3', + 'cmn_simple', + 'eng_add_prim_jump', + 'eng_add_prim_turn_left', + 'eng_length', + 'eng_mcd1', + 'eng_mcd2', + 'eng_mcd3', + 'eng_simple', + 'fra_add_prim_jump', + 'fra_add_prim_turn_left', + 'fra_length', + 'fra_mcd1', + 'fra_mcd2', + 'fra_mcd3', + 'fra_simple', + 'hin_add_prim_jump', + 'hin_add_prim_turn_left', + 'hin_length', + 'hin_mcd1', + 'hin_mcd2', + 'hin_mcd3', + 'hin_simple', + 'rus_add_prim_jump', + 'rus_add_prim_turn_left', + 'rus_length', + 'rus_mcd1', + 'rus_mcd2', + 'rus_mcd3', + 'rus_simple', + + ], +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/doc.md b/src/genbench/tasks/multilingual_scan/doc.md new file mode 100644 index 0000000..bb787be --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/doc.md @@ -0,0 +1,37 @@ +## Motivation +It is widely acknowledged that fine-tuning a pretrained model generally results in better performance on a given task, compared to training the model from scratch. Evidence suggest that this is also the case for compositional generalization tasks. However, it has also been shown that multilingual models may not exhibit consistent performance across languages, with low resource languages often doing worse. Can we expect similar variations between languages when testing a multilingual model for compositionality? + +The majority of research on compositional generalisation has focussed on English data and models. With the ambition to gain a deeper understanding on this issue from a multilingual perspective, we aim to adapt SCAN an existing compositionality benchmark into multiple languages, in order to evaluate multilingual LLMs for compositional generalization. + + +## Examples +We have five subtasks, corresponding each to five languages: French (fra), Hindi (hin), Russian (ru), Turkish (tur), and Mandarin Chinese (cmn). + +Each subtask consists of the adapted SCAN dataset into the aforementioned languages. + +Example: + +### SCAN example + +``` +IN: walk opposite right thrice after run opposite right +OUT: I_TURN_RIGHT I_TURN_RIGHT I_RUN I_TURN_RIGHT I_TURN_RIGHT I_WALK I_TURN_RIGHT I_TURN_RIGHT I_WALK I_TURN_RIGHT I_TURN_RIGHT I_WALK +``` + +### FRA-SCAN example + +``` +IN: marcher à l'envers par la droite trois fois après courir à l'envers par la droite +OUT: I_TURN_RIGHT I_TURN_RIGHT I_RUN I_TURN_RIGHT I_TURN_RIGHT I_WALK I_TURN_RIGHT I_TURN_RIGHT I_WALK I_TURN_RIGHT I_TURN_RIGHT I_WALK +``` + +## Data Source +To generate the data, native speakers of the five selected languages have been asked to manually translate the vocabulary of the original SCAN dataset as well as either the equivalent interpretation function or grammar in their own language. + +## Limitations and Bias +*Note any known limitations or biases that the Multilingual SCAN has, with links and references if possible.* + +## Citation +** +## Further References +Original SCAN benchmark paper (Lake & Baroni, 2018): https://arxiv.org/abs/1711.00350 diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/__init__.py b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/config.jsonnet new file mode 100644 index 0000000..d7254af --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: add_prim_jump)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/add_prim_jump/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/add_prim_jump/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/doc.md b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/doc.md new file mode 100644 index 0000000..fa998b1 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_add_prim_jump) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_add_prim_jump).* + +## Examples +*Give some examples of the mSCAN (eng_add_prim_jump).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_add_prim_jump).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_add_prim_jump) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/task.py b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/task.py new file mode 100644 index 0000000..43b7245 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_add_prim_jump/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngAddPrimJump(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/__init__.py b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/config.jsonnet new file mode 100644 index 0000000..83eddca --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: add_prim_turn_left)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/add_prim_turn_left/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/add_prim_turn_left/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/doc.md b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/doc.md new file mode 100644 index 0000000..62ff5da --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_add_prim_turn_left) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_add_prim_turn_left).* + +## Examples +*Give some examples of the mSCAN (eng_add_prim_turn_left).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_add_prim_turn_left).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_add_prim_turn_left) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/task.py b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/task.py new file mode 100644 index 0000000..a3e22f4 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_add_prim_turn_left/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngAddPrimTurnLeft(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/eng_length/__init__.py b/src/genbench/tasks/multilingual_scan/eng_length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_length/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_length/config.jsonnet new file mode 100644 index 0000000..304c30e --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_length/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: length)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/length/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/length/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_length/doc.md b/src/genbench/tasks/multilingual_scan/eng_length/doc.md new file mode 100644 index 0000000..e7216e7 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_length/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_length) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_length).* + +## Examples +*Give some examples of the mSCAN (eng_length).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_length).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_length) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_length/task.py b/src/genbench/tasks/multilingual_scan/eng_length/task.py new file mode 100644 index 0000000..b8d5b2a --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_length/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngLength(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd1/__init__.py b/src/genbench/tasks/multilingual_scan/eng_mcd1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd1/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_mcd1/config.jsonnet new file mode 100644 index 0000000..70ffd6e --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd1/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: mcd1)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd1/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd1/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd1/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd1/doc.md b/src/genbench/tasks/multilingual_scan/eng_mcd1/doc.md new file mode 100644 index 0000000..d5858a9 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd1/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_mcd1) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_mcd1).* + +## Examples +*Give some examples of the mSCAN (eng_mcd1).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_mcd1).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_mcd1) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd1/task.py b/src/genbench/tasks/multilingual_scan/eng_mcd1/task.py new file mode 100644 index 0000000..d7f0b82 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd1/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngMcd1(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd2/__init__.py b/src/genbench/tasks/multilingual_scan/eng_mcd2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd2/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_mcd2/config.jsonnet new file mode 100644 index 0000000..967966f --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd2/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: mcd2)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd2/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd2/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd2/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd2/doc.md b/src/genbench/tasks/multilingual_scan/eng_mcd2/doc.md new file mode 100644 index 0000000..aa34290 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd2/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_mcd2) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_mcd2).* + +## Examples +*Give some examples of the mSCAN (eng_mcd2).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_mcd2).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_mcd2) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd2/task.py b/src/genbench/tasks/multilingual_scan/eng_mcd2/task.py new file mode 100644 index 0000000..bf38658 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd2/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngMcd2(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd3/__init__.py b/src/genbench/tasks/multilingual_scan/eng_mcd3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd3/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_mcd3/config.jsonnet new file mode 100644 index 0000000..7469dfe --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd3/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: mcd3)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd3/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd3/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/mcd3/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd3/doc.md b/src/genbench/tasks/multilingual_scan/eng_mcd3/doc.md new file mode 100644 index 0000000..84c2207 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd3/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_mcd3) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_mcd3).* + +## Examples +*Give some examples of the mSCAN (eng_mcd3).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_mcd3).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_mcd3) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_mcd3/task.py b/src/genbench/tasks/multilingual_scan/eng_mcd3/task.py new file mode 100644 index 0000000..7204d95 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_mcd3/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngMcd3(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/eng_simple/__init__.py b/src/genbench/tasks/multilingual_scan/eng_simple/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/eng_simple/config.jsonnet b/src/genbench/tasks/multilingual_scan/eng_simple/config.jsonnet new file mode 100644 index 0000000..261e6a4 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_simple/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: eng, split: simple)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (eng) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/simple/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/eng/simple/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/eng_simple/doc.md b/src/genbench/tasks/multilingual_scan/eng_simple/doc.md new file mode 100644 index 0000000..3e26b46 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_simple/doc.md @@ -0,0 +1,19 @@ +# mSCAN (eng_simple) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (eng_simple).* + +## Examples +*Give some examples of the mSCAN (eng_simple).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (eng_simple).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (eng_simple) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/eng_simple/task.py b/src/genbench/tasks/multilingual_scan/eng_simple/task.py new file mode 100644 index 0000000..68d3507 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/eng_simple/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanEngSimple(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/__init__.py b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/config.jsonnet new file mode 100644 index 0000000..c941e07 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: add_prim_jump)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/add_prim_jump/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/add_prim_jump/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/doc.md b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/doc.md new file mode 100644 index 0000000..ee0e7f0 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_add_prim_jump) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_add_prim_jump).* + +## Examples +*Give some examples of the mSCAN (fra_add_prim_jump).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_add_prim_jump).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_add_prim_jump) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/task.py b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/task.py new file mode 100644 index 0000000..758a4cd --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_add_prim_jump/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraAddPrimJump(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/__init__.py b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/config.jsonnet new file mode 100644 index 0000000..be80b72 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: add_prim_turn_left)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/add_prim_turn_left/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/add_prim_turn_left/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/doc.md b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/doc.md new file mode 100644 index 0000000..12d0753 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_add_prim_turn_left) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_add_prim_turn_left).* + +## Examples +*Give some examples of the mSCAN (fra_add_prim_turn_left).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_add_prim_turn_left).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_add_prim_turn_left) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/task.py b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/task.py new file mode 100644 index 0000000..038c5a6 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_add_prim_turn_left/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraAddPrimTurnLeft(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_length/__init__.py b/src/genbench/tasks/multilingual_scan/fra_length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_length/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_length/config.jsonnet new file mode 100644 index 0000000..fdc0563 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_length/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: length)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/length/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/length/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_length/doc.md b/src/genbench/tasks/multilingual_scan/fra_length/doc.md new file mode 100644 index 0000000..bf5cb16 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_length/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_length) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_length).* + +## Examples +*Give some examples of the mSCAN (fra_length).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_length).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_length) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_length/task.py b/src/genbench/tasks/multilingual_scan/fra_length/task.py new file mode 100644 index 0000000..448d083 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_length/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraLength(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd1/__init__.py b/src/genbench/tasks/multilingual_scan/fra_mcd1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd1/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_mcd1/config.jsonnet new file mode 100644 index 0000000..0b92ef5 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd1/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: mcd1)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd1/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd1/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd1/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd1/doc.md b/src/genbench/tasks/multilingual_scan/fra_mcd1/doc.md new file mode 100644 index 0000000..4a6dadb --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd1/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_mcd1) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_mcd1).* + +## Examples +*Give some examples of the mSCAN (fra_mcd1).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_mcd1).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_mcd1) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd1/task.py b/src/genbench/tasks/multilingual_scan/fra_mcd1/task.py new file mode 100644 index 0000000..afad2ed --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd1/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraMcd1(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd2/__init__.py b/src/genbench/tasks/multilingual_scan/fra_mcd2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd2/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_mcd2/config.jsonnet new file mode 100644 index 0000000..27bff0e --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd2/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: mcd2)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd2/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd2/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd2/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd2/doc.md b/src/genbench/tasks/multilingual_scan/fra_mcd2/doc.md new file mode 100644 index 0000000..fadde5e --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd2/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_mcd2) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_mcd2).* + +## Examples +*Give some examples of the mSCAN (fra_mcd2).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_mcd2).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_mcd2) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd2/task.py b/src/genbench/tasks/multilingual_scan/fra_mcd2/task.py new file mode 100644 index 0000000..6e0301e --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd2/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraMcd2(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd3/__init__.py b/src/genbench/tasks/multilingual_scan/fra_mcd3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd3/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_mcd3/config.jsonnet new file mode 100644 index 0000000..6084da4 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd3/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: mcd3)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd3/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd3/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/mcd3/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd3/doc.md b/src/genbench/tasks/multilingual_scan/fra_mcd3/doc.md new file mode 100644 index 0000000..7e32d52 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd3/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_mcd3) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_mcd3).* + +## Examples +*Give some examples of the mSCAN (fra_mcd3).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_mcd3).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_mcd3) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_mcd3/task.py b/src/genbench/tasks/multilingual_scan/fra_mcd3/task.py new file mode 100644 index 0000000..0e548a4 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_mcd3/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraMcd3(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/fra_simple/__init__.py b/src/genbench/tasks/multilingual_scan/fra_simple/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/fra_simple/config.jsonnet b/src/genbench/tasks/multilingual_scan/fra_simple/config.jsonnet new file mode 100644 index 0000000..a9baeeb --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_simple/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: fra, split: simple)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (fra) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/simple/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/fra/simple/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/fra_simple/doc.md b/src/genbench/tasks/multilingual_scan/fra_simple/doc.md new file mode 100644 index 0000000..2a78947 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_simple/doc.md @@ -0,0 +1,19 @@ +# mSCAN (fra_simple) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (fra_simple).* + +## Examples +*Give some examples of the mSCAN (fra_simple).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (fra_simple).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (fra_simple) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/fra_simple/task.py b/src/genbench/tasks/multilingual_scan/fra_simple/task.py new file mode 100644 index 0000000..af865b5 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/fra_simple/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanFraSimple(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/__init__.py b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/config.jsonnet new file mode 100644 index 0000000..34859fa --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: add_prim_jump)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/add_prim_jump/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/add_prim_jump/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/doc.md b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/doc.md new file mode 100644 index 0000000..d80ee89 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_add_prim_jump) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_add_prim_jump).* + +## Examples +*Give some examples of the mSCAN (hin_add_prim_jump).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_add_prim_jump).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_add_prim_jump) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/task.py b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/task.py new file mode 100644 index 0000000..843b5aa --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_add_prim_jump/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinAddPrimJump(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/__init__.py b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/config.jsonnet new file mode 100644 index 0000000..2c0d571 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: add_prim_turn_left)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/add_prim_turn_left/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/add_prim_turn_left/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/doc.md b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/doc.md new file mode 100644 index 0000000..82d9539 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_add_prim_turn_left) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_add_prim_turn_left).* + +## Examples +*Give some examples of the mSCAN (hin_add_prim_turn_left).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_add_prim_turn_left).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_add_prim_turn_left) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/task.py b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/task.py new file mode 100644 index 0000000..3d3354a --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_add_prim_turn_left/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinAddPrimTurnLeft(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_length/__init__.py b/src/genbench/tasks/multilingual_scan/hin_length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_length/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_length/config.jsonnet new file mode 100644 index 0000000..1dd5b89 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_length/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: length)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/length/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/length/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_length/doc.md b/src/genbench/tasks/multilingual_scan/hin_length/doc.md new file mode 100644 index 0000000..b98cb9d --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_length/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_length) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_length).* + +## Examples +*Give some examples of the mSCAN (hin_length).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_length).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_length) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_length/task.py b/src/genbench/tasks/multilingual_scan/hin_length/task.py new file mode 100644 index 0000000..86aa014 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_length/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinLength(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd1/__init__.py b/src/genbench/tasks/multilingual_scan/hin_mcd1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd1/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_mcd1/config.jsonnet new file mode 100644 index 0000000..57373cb --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd1/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: mcd1)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd1/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd1/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd1/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd1/doc.md b/src/genbench/tasks/multilingual_scan/hin_mcd1/doc.md new file mode 100644 index 0000000..c5c7420 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd1/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_mcd1) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_mcd1).* + +## Examples +*Give some examples of the mSCAN (hin_mcd1).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_mcd1).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_mcd1) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd1/task.py b/src/genbench/tasks/multilingual_scan/hin_mcd1/task.py new file mode 100644 index 0000000..74d9b29 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd1/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinMcd1(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd2/__init__.py b/src/genbench/tasks/multilingual_scan/hin_mcd2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd2/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_mcd2/config.jsonnet new file mode 100644 index 0000000..27ff3dd --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd2/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: mcd2)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd2/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd2/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd2/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd2/doc.md b/src/genbench/tasks/multilingual_scan/hin_mcd2/doc.md new file mode 100644 index 0000000..60065d1 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd2/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_mcd2) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_mcd2).* + +## Examples +*Give some examples of the mSCAN (hin_mcd2).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_mcd2).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_mcd2) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd2/task.py b/src/genbench/tasks/multilingual_scan/hin_mcd2/task.py new file mode 100644 index 0000000..e08a1cb --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd2/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinMcd2(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd3/__init__.py b/src/genbench/tasks/multilingual_scan/hin_mcd3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd3/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_mcd3/config.jsonnet new file mode 100644 index 0000000..7fc968d --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd3/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: mcd3)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd3/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd3/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/mcd3/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd3/doc.md b/src/genbench/tasks/multilingual_scan/hin_mcd3/doc.md new file mode 100644 index 0000000..ec9c069 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd3/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_mcd3) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_mcd3).* + +## Examples +*Give some examples of the mSCAN (hin_mcd3).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_mcd3).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_mcd3) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_mcd3/task.py b/src/genbench/tasks/multilingual_scan/hin_mcd3/task.py new file mode 100644 index 0000000..063a585 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_mcd3/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinMcd3(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/hin_simple/__init__.py b/src/genbench/tasks/multilingual_scan/hin_simple/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/hin_simple/config.jsonnet b/src/genbench/tasks/multilingual_scan/hin_simple/config.jsonnet new file mode 100644 index 0000000..1950780 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_simple/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: hin, split: simple)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (hin) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/simple/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/hin/simple/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/hin_simple/doc.md b/src/genbench/tasks/multilingual_scan/hin_simple/doc.md new file mode 100644 index 0000000..7d1529b --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_simple/doc.md @@ -0,0 +1,19 @@ +# mSCAN (hin_simple) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (hin_simple).* + +## Examples +*Give some examples of the mSCAN (hin_simple).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (hin_simple).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (hin_simple) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/hin_simple/task.py b/src/genbench/tasks/multilingual_scan/hin_simple/task.py new file mode 100644 index 0000000..1c57a11 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/hin_simple/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanHinSimple(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/__init__.py b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/config.jsonnet new file mode 100644 index 0000000..55cfc72 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: add_prim_jump)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/add_prim_jump/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/add_prim_jump/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/doc.md b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/doc.md new file mode 100644 index 0000000..2b781db --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_add_prim_jump) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_add_prim_jump).* + +## Examples +*Give some examples of the mSCAN (rus_add_prim_jump).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_add_prim_jump).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_add_prim_jump) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/task.py b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/task.py new file mode 100644 index 0000000..decfa49 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_add_prim_jump/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusAddPrimJump(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/__init__.py b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/config.jsonnet new file mode 100644 index 0000000..400ba0f --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: add_prim_turn_left)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/add_prim_turn_left/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/add_prim_turn_left/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/doc.md b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/doc.md new file mode 100644 index 0000000..eec63b0 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_add_prim_turn_left) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_add_prim_turn_left).* + +## Examples +*Give some examples of the mSCAN (rus_add_prim_turn_left).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_add_prim_turn_left).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_add_prim_turn_left) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/task.py b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/task.py new file mode 100644 index 0000000..3441e5b --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_add_prim_turn_left/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusAddPrimTurnLeft(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_length/__init__.py b/src/genbench/tasks/multilingual_scan/rus_length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_length/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_length/config.jsonnet new file mode 100644 index 0000000..2aad9aa --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_length/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: length)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/length/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/length/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_length/doc.md b/src/genbench/tasks/multilingual_scan/rus_length/doc.md new file mode 100644 index 0000000..68be06c --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_length/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_length) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_length).* + +## Examples +*Give some examples of the mSCAN (rus_length).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_length).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_length) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_length/task.py b/src/genbench/tasks/multilingual_scan/rus_length/task.py new file mode 100644 index 0000000..b9d8b4c --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_length/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusLength(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd1/__init__.py b/src/genbench/tasks/multilingual_scan/rus_mcd1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd1/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_mcd1/config.jsonnet new file mode 100644 index 0000000..b491ec5 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd1/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: mcd1)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd1/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd1/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd1/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd1/doc.md b/src/genbench/tasks/multilingual_scan/rus_mcd1/doc.md new file mode 100644 index 0000000..70750f3 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd1/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_mcd1) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_mcd1).* + +## Examples +*Give some examples of the mSCAN (rus_mcd1).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_mcd1).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_mcd1) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd1/task.py b/src/genbench/tasks/multilingual_scan/rus_mcd1/task.py new file mode 100644 index 0000000..b4562f4 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd1/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusMcd1(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd2/__init__.py b/src/genbench/tasks/multilingual_scan/rus_mcd2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd2/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_mcd2/config.jsonnet new file mode 100644 index 0000000..5cad3f2 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd2/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: mcd2)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd2/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd2/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd2/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd2/doc.md b/src/genbench/tasks/multilingual_scan/rus_mcd2/doc.md new file mode 100644 index 0000000..07a5546 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd2/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_mcd2) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_mcd2).* + +## Examples +*Give some examples of the mSCAN (rus_mcd2).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_mcd2).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_mcd2) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd2/task.py b/src/genbench/tasks/multilingual_scan/rus_mcd2/task.py new file mode 100644 index 0000000..40fb09a --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd2/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusMcd2(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd3/__init__.py b/src/genbench/tasks/multilingual_scan/rus_mcd3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd3/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_mcd3/config.jsonnet new file mode 100644 index 0000000..87595a6 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd3/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: mcd3)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd3/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd3/test.jsonl', + validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/mcd3/dev.jsonl', + }, + + has_validation_set: true, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd3/doc.md b/src/genbench/tasks/multilingual_scan/rus_mcd3/doc.md new file mode 100644 index 0000000..892aad5 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd3/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_mcd3) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_mcd3).* + +## Examples +*Give some examples of the mSCAN (rus_mcd3).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_mcd3).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_mcd3) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_mcd3/task.py b/src/genbench/tasks/multilingual_scan/rus_mcd3/task.py new file mode 100644 index 0000000..4c7d22c --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_mcd3/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusMcd3(Task): + pass diff --git a/src/genbench/tasks/multilingual_scan/rus_simple/__init__.py b/src/genbench/tasks/multilingual_scan/rus_simple/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/genbench/tasks/multilingual_scan/rus_simple/config.jsonnet b/src/genbench/tasks/multilingual_scan/rus_simple/config.jsonnet new file mode 100644 index 0000000..9c08282 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_simple/config.jsonnet @@ -0,0 +1,55 @@ +{ + name: 'mSCAN (language: rus, split: simple)', + + // @TODO: Add a description of the task + description: 'Multilingual SCAN (rus) aims to measure ...', + + // @TODO: Add a list of keywords that describe the task + keywords: [ + 'keyword1', + 'keyword2', + ], + + authors: [ + 'Amélie Reymond', + 'Shane Steinert-Threlkeld', + ], + + data_source: { + type: 'manual', + train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/simple/train.jsonl', + test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/rus/simple/test.jsonl', + + }, + + has_validation_set: false, + has_train_set: true, + + task_type: 'free_form', + + preparation_strategies: { + finetuning: { + objective: 'maximum_likelihood', + }, + + prompt_based_testing: { + prompt_builder: { + instruction_zero_shot: '', + input_prefix: 'IN: ', + output_prefix: 'OUT: ', + append_choices_to_input: false, + few_shot_example_separator: '\n\n', + stop_string: '\n\n', + } + }, + + }, + + evaluation_metrics: [ + { + hf_id: 'exact_match', + git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742", + best_score: 1.0, + } + ] +} \ No newline at end of file diff --git a/src/genbench/tasks/multilingual_scan/rus_simple/doc.md b/src/genbench/tasks/multilingual_scan/rus_simple/doc.md new file mode 100644 index 0000000..124640a --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_simple/doc.md @@ -0,0 +1,19 @@ +# mSCAN (rus_simple) + +## Abstract +*Copy the abstract of your accompanying paper for this task here mSCAN (rus_simple).* + +## Examples +*Give some examples of the mSCAN (rus_simple).* + +## Usage +*Describe how to load your task and what is required for evaluation, if anything.* + +## Data Source +*Describe the data source for this mSCAN (rus_simple).* + +## Limitations and Bias +*Note any known limitations or biases that the mSCAN (rus_simple) has, with links and references if possible.* + +## GenBench Eval card +*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*. diff --git a/src/genbench/tasks/multilingual_scan/rus_simple/task.py b/src/genbench/tasks/multilingual_scan/rus_simple/task.py new file mode 100644 index 0000000..d76e6c8 --- /dev/null +++ b/src/genbench/tasks/multilingual_scan/rus_simple/task.py @@ -0,0 +1,5 @@ +from genbench import Task + + +class MscanRusSimple(Task): + pass