GenBench · ameliereymond · Aug 1, 2023 · Aug 1, 2023 · Aug 1, 2023 · Aug 1, 2023
diff --git a/scripts/mscan/create-mscan-jsonnets.py b/scripts/mscan/create-mscan-jsonnets.py
@@ -0,0 +1,90 @@
+
+def get_jsonnet(language, split, has_dev):
+    return f"""{{
+    name: 'mSCAN (language: {language}, split: {split})',
+
+    // @TODO: Add a description of the task
+    description: 'Multilingual SCAN ({language}) aims to measure ...',
+
+    // @TODO: Add a list of keywords that describe the task
+    keywords: [
+        'keyword1',
+        'keyword2',
+    ],
+
+    authors: [
+        'Amélie Reymond',
+        'Shane Steinert-Threlkeld',
+    ],
+
+    data_source: {{
+        type: 'manual',
+        train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/{language}/{split}/train.jsonl',
+        test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/{language}/{split}/test.jsonl',
+        {f"validation: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/{language}/{split}/dev.jsonl'," if has_dev else ""}
+    }},
+
+    has_validation_set: {"true" if has_dev else "false"},
+    has_train_set: true,
+
+    task_type: 'free_form',
+
+    preparation_strategies: {{
+        finetuning: {{
+            objective: 'maximum_likelihood',
+        }},
+
+        prompt_based_testing: {{
+            prompt_builder: {{
+                instruction_zero_shot: '',
+                input_prefix: 'IN: ',
+                output_prefix: 'OUT: ',
+                append_choices_to_input: false,
+                few_shot_example_separator: '\\n\\n',
+                stop_string: '\\n\\n',
+            }}
+        }},
+
+    }},
+
+    evaluation_metrics: [
+        {{
+            hf_id: 'exact_match',
+            git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742",
+            best_score: 1.0,
+        }}
+    ]
+}}"""
+
+
+languages = ["cmn", "eng", "fra", "hin", "rus"]
+splits = {
+    "add_prim_jump": {
+        "has_dev": False
+    },
+    "add_prim_turn_left": {
+        "has_dev": False
+    },
+    "length": {
+        "has_dev": False
+    },
+    "mcd1": {
+        "has_dev": True
+    },
+    "mcd2": {
+        "has_dev": True
+    },
+    "mcd3": {
+        "has_dev": True
+    },
+    "simple": {
+        "has_dev": False
+    },
+}
+
+for language in languages:
+    for split_name, split_args in splits.items():
+        jsonnet = get_jsonnet(language, split_name, split_args["has_dev"])
+        with open(f"src/genbench/tasks/mscan/{language}_{split_name}/config.jsonnet", "w") as f:
+            f.write(jsonnet)
+
diff --git a/scripts/mscan/create-mscan.sh b/scripts/mscan/create-mscan.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+genbench-cli create-task --id "mscan" --name "mSCAN" \
+    -s cmn_add_prim_jump \
+    -s cmn_add_prim_turn_left \
+    -s cmn_length \
+    -s cmn_mcd1 \
+    -s cmn_mcd2 \
+    -s cmn_mcd3 \
+    -s cmn_simple \
+    -s eng_add_prim_jump \
+    -s eng_add_prim_turn_left \
+    -s eng_length \
+    -s eng_mcd1 \
+    -s eng_mcd2 \
+    -s eng_mcd3 \
+    -s eng_simple \
+    -s fra_add_prim_jump \
+    -s fra_add_prim_turn_left \
+    -s fra_length \
+    -s fra_mcd1 \
+    -s fra_mcd2 \
+    -s fra_mcd3 \
+    -s fra_simple \
+    -s hin_add_prim_jump \
+    -s hin_add_prim_turn_left \
+    -s hin_length \
+    -s hin_mcd1 \
+    -s hin_mcd2 \
+    -s hin_mcd3 \
+    -s hin_simple \
+    -s rus_add_prim_jump \
+    -s rus_add_prim_turn_left \
+    -s rus_length \
+    -s rus_mcd1 \
+    -s rus_mcd2 \
+    -s rus_mcd3 \
+    -s rus_simple
diff --git a/src/genbench/tasks/multilingual_scan/GenBench_eval_card.png b/src/genbench/tasks/multilingual_scan/GenBench_eval_card.png
diff --git a/src/genbench/tasks/multilingual_scan/__init__.py b/src/genbench/tasks/multilingual_scan/__init__.py
@@ -0,0 +1,5 @@
+from genbench import TaskDict
+
+
+class Mscan(TaskDict):
+    pass
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/__init__.py
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/config.jsonnet
@@ -0,0 +1,55 @@
+{
+    name: 'mSCAN (language: cmn, split: add_prim_jump)',
+
+    // @TODO: Add a description of the task
+    description: 'Multilingual SCAN (cmn) aims to measure ...',
+
+    // @TODO: Add a list of keywords that describe the task
+    keywords: [
+        'keyword1',
+        'keyword2',
+    ],
+
+    authors: [
+        'Amélie Reymond',
+        'Shane Steinert-Threlkeld',
+    ],
+
+    data_source: {
+        type: 'manual',
+        train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_jump/train.jsonl',
+        test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_jump/test.jsonl',
+
+    },
+
+    has_validation_set: false,
+    has_train_set: true,
+
+    task_type: 'free_form',
+
+    preparation_strategies: {
+        finetuning: {
+            objective: 'maximum_likelihood',
+        },
+
+        prompt_based_testing: {
+            prompt_builder: {
+                instruction_zero_shot: '',
+                input_prefix: 'IN: ',
+                output_prefix: 'OUT: ',
+                append_choices_to_input: false,
+                few_shot_example_separator: '\n\n',
+                stop_string: '\n\n',
+            }
+        },
+
+    },
+
+    evaluation_metrics: [
+        {
+            hf_id: 'exact_match',
+            git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742",
+            best_score: 1.0,
+        }
+    ]
+}
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/doc.md b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/doc.md
@@ -0,0 +1,19 @@
+# mSCAN (cmn_add_prim_jump)
+
+## Abstract
+*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_add_prim_jump).*
+
+## Examples
+*Give some examples of the mSCAN (cmn_add_prim_jump).*
+
+## Usage
+*Describe how to load your task and what is required for evaluation, if anything.*
+
+## Data Source
+*Describe the data source for this mSCAN (cmn_add_prim_jump).*
+
+## Limitations and Bias
+*Note any known limitations or biases that the mSCAN (cmn_add_prim_jump) has, with links and references if possible.*
+
+## GenBench Eval card
+*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*.
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/task.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_jump/task.py
@@ -0,0 +1,5 @@
+from genbench import Task
+
+
+class MscanCmnAddPrimJump(Task):
+    pass
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/__init__.py
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/config.jsonnet
@@ -0,0 +1,55 @@
+{
+    name: 'mSCAN (language: cmn, split: add_prim_turn_left)',
+
+    // @TODO: Add a description of the task
+    description: 'Multilingual SCAN (cmn) aims to measure ...',
+
+    // @TODO: Add a list of keywords that describe the task
+    keywords: [
+        'keyword1',
+        'keyword2',
+    ],
+
+    authors: [
+        'Amélie Reymond',
+        'Shane Steinert-Threlkeld',
+    ],
+
+    data_source: {
+        type: 'manual',
+        train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_turn_left/train.jsonl',
+        test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/add_prim_turn_left/test.jsonl',
+
+    },
+
+    has_validation_set: false,
+    has_train_set: true,
+
+    task_type: 'free_form',
+
+    preparation_strategies: {
+        finetuning: {
+            objective: 'maximum_likelihood',
+        },
+
+        prompt_based_testing: {
+            prompt_builder: {
+                instruction_zero_shot: '',
+                input_prefix: 'IN: ',
+                output_prefix: 'OUT: ',
+                append_choices_to_input: false,
+                few_shot_example_separator: '\n\n',
+                stop_string: '\n\n',
+            }
+        },
+
+    },
+
+    evaluation_metrics: [
+        {
+            hf_id: 'exact_match',
+            git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742",
+            best_score: 1.0,
+        }
+    ]
+}
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/doc.md b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/doc.md
@@ -0,0 +1,19 @@
+# mSCAN (cmn_add_prim_turn_left)
+
+## Abstract
+*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_add_prim_turn_left).*
+
+## Examples
+*Give some examples of the mSCAN (cmn_add_prim_turn_left).*
+
+## Usage
+*Describe how to load your task and what is required for evaluation, if anything.*
+
+## Data Source
+*Describe the data source for this mSCAN (cmn_add_prim_turn_left).*
+
+## Limitations and Bias
+*Note any known limitations or biases that the mSCAN (cmn_add_prim_turn_left) has, with links and references if possible.*
+
+## GenBench Eval card
+*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*.
diff --git a/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/task.py b/src/genbench/tasks/multilingual_scan/cmn_add_prim_turn_left/task.py
@@ -0,0 +1,5 @@
+from genbench import Task
+
+
+class MscanCmnAddPrimTurnLeft(Task):
+    pass
diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_length/__init__.py
diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/config.jsonnet b/src/genbench/tasks/multilingual_scan/cmn_length/config.jsonnet
@@ -0,0 +1,55 @@
+{
+    name: 'mSCAN (language: cmn, split: length)',
+
+    // @TODO: Add a description of the task
+    description: 'Multilingual SCAN (cmn) aims to measure ...',
+
+    // @TODO: Add a list of keywords that describe the task
+    keywords: [
+        'keyword1',
+        'keyword2',
+    ],
+
+    authors: [
+        'Amélie Reymond',
+        'Shane Steinert-Threlkeld',
+    ],
+
+    data_source: {
+        type: 'manual',
+        train: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/length/train.jsonl',
+        test: 'https://huggingface.co/datasets/ameliettr/mSCAN/raw/main/cmn/length/test.jsonl',
+
+    },
+
+    has_validation_set: false,
+    has_train_set: true,
+
+    task_type: 'free_form',
+
+    preparation_strategies: {
+        finetuning: {
+            objective: 'maximum_likelihood',
+        },
+
+        prompt_based_testing: {
+            prompt_builder: {
+                instruction_zero_shot: '',
+                input_prefix: 'IN: ',
+                output_prefix: 'OUT: ',
+                append_choices_to_input: false,
+                few_shot_example_separator: '\n\n',
+                stop_string: '\n\n',
+            }
+        },
+
+    },
+
+    evaluation_metrics: [
+        {
+            hf_id: 'exact_match',
+            git_commit_sha: "758135da6a37ce962b7bc38c6dd5eab672d2b742",
+            best_score: 1.0,
+        }
+    ]
+}
diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/doc.md b/src/genbench/tasks/multilingual_scan/cmn_length/doc.md
@@ -0,0 +1,19 @@
+# mSCAN (cmn_length)
+
+## Abstract
+*Copy the abstract of your accompanying paper for this task here mSCAN (cmn_length).*
+
+## Examples
+*Give some examples of the mSCAN (cmn_length).*
+
+## Usage
+*Describe how to load your task and what is required for evaluation, if anything.*
+
+## Data Source
+*Describe the data source for this mSCAN (cmn_length).*
+
+## Limitations and Bias
+*Note any known limitations or biases that the mSCAN (cmn_length) has, with links and references if possible.*
+
+## GenBench Eval card
+*Describe what kind of generalisation your task is evaluating, and include a [genbench eval card](https://genbench.org/eval_cards/) for your task*.
diff --git a/src/genbench/tasks/multilingual_scan/cmn_length/task.py b/src/genbench/tasks/multilingual_scan/cmn_length/task.py
@@ -0,0 +1,5 @@
+from genbench import Task
+
+
+class MscanCmnLength(Task):
+    pass
diff --git a/src/genbench/tasks/multilingual_scan/cmn_mcd1/__init__.py b/src/genbench/tasks/multilingual_scan/cmn_mcd1/__init__.py