Updated desc. for norwegian parl.

KennethEnevoldsen · Jan 9, 2024 · c7f1e74 · c7f1e74
1 parent c317ad8
commit c7f1e74
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 4 deletions.
diff --git a/docs/datasets.md b/docs/datasets.md
@@ -12,8 +12,8 @@ The following tables contains description of all the dataset in the benchmark al
 | [Language Identification](https://aclanthology.org/2021.vardial-1.8/)                                                                                    | A dataset for Nordic language identification.                                                                                                                                                              | Accuracy     | da, sv, nb, nn, is, fo | Classification | wiki                                                     |                  3000 | 78.23 (std: 48.54)                      |
 | [Massive Intent](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.)   | MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages                                                                                          | Accuracy     | da, nb, sv             | Classification | spoken                                                   |                 15021 | 34.65 (std: 16.99)                      |
 | [Massive Scenario](https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.) | MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages                                                                                          | Accuracy     | da, nb, sv             | Classification | spoken                                                   |                 15021 | 34.65 (std: 16.99)                      |
-| [NoReC](https://aclanthology.org/L18-1661/)                                                                                                              | A Norwegian dataset for sentiment classification on reviews                                                                                                                                                | Accuracy     | nb                     | Classification | reviews                                                  |                  2048 | 89.62 (std: 61.21)                      |
-| [Norwegian parliament](https://huggingface.co/datasets/NbAiLab/norwegian_parliament)                                                                     | Norwegian parliament speeches annotated for sentiment                                                                                                                                                      | Accuracy     | nb                     | Classification | spoken                                                   |                  2400 | 1897.51 (std: 1988.62)                  |
+| [NoReC](https://aclanthology.org/L18-1661/)                                                                                                              | A Norwegian dataset for sentiment classification on review                                                                                                                                                 | Accuracy     | nb                     | Classification | reviews                                                  |                  2048 | 89.62 (std: 61.21)                      |
+| [Norwegian parliament](https://huggingface.co/datasets/NbAiLab/norwegian_parliament)                                                                     | Norwegian parliament speeches annotated with the party of the speaker (`Sosialistisk Venstreparti` vs `Fremskrittspartiet`)                                                                                | Accuracy     | nb                     | Classification | spoken                                                   |                  2400 | 1897.51 (std: 1988.62)                  |
 | [ScaLA](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                    | A linguistic acceptability task for Danish, Norwegian Bokmål Norwegian Nynorsk and Swedish.                                                                                                                | Accuracy     | da, nb, sv, nn         | Classification | fiction, news, non-fiction, spoken, blog                 |                 74846 | 102.50 (std: 56.10)                     |
 | [SweFAQ](https://spraakbanken.gu.se/en/resources/superlim)                                                                                               | A Swedish QA dataset derived from FAQ                                                                                                                                                                      | Ndcg_at_10   | sv                     | Retrieval      | non-fiction, web                                         |                  1539 | 236.21 (std: 225.72)                    |
 | [SweReC](https://aclanthology.org/2023.nodalida-1.20/)                                                                                                   | A Swedish dataset for sentiment classification on review                                                                                                                                                   | Accuracy     | sv                     | Classification | reviews                                                  |                  2048 | 318.83 (std: 499.57)                    |

diff --git a/src/seb/mteb_tasks.py b/src/seb/mteb_tasks.py
@@ -1,6 +1,7 @@
 from typing import Any
 
 import datasets
+from mteb.abstasks import AbsTaskClassification
 from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval
 
 
@@ -77,3 +78,24 @@ def dataset_transform(self) -> None:
                 self.relevant_docs[split][str(q_n)] = {
                     str(cor_n): 1,
                 }  # only one correct match
+
+
+class NorwegianParliamentClassification(AbsTaskClassification):
+    # this changes the description of the tasks but otherwise is the same as the task in the MTEB benchmark
+    # once we have collected a few MTEB tasks not in the MTEB benchmark we can add them back to the benchmark.
+    @property
+    def description(self) -> dict[str, Any]:
+        return {
+            "name": "NorwegianParliament",
+            "hf_hub_name": "NbAiLab/norwegian_parliament",
+            "description": "Norwegian parliament speeches annotated with the party of the speaker (`Sosialistisk Venstreparti` vs `Fremskrittspartiet`)",
+            "reference": "https://huggingface.co/datasets/NbAiLab/norwegian_parliament",
+            "type": "Classification",
+            "category": "s2s",
+            "eval_splits": ["test", "validation"],
+            "eval_langs": ["nb"],  # assumed to be bokmål
+            "main_score": "accuracy",
+            "n_experiments": 10,
+            "samples_per_label": 16,
+            "revision": "f7393532774c66312378d30b197610b43d751972",
+        }
diff --git a/src/seb/seb_tasks/norwegian.py b/src/seb/seb_tasks/norwegian.py
@@ -1,3 +1,4 @@
+from seb.mteb_tasks import NorwegianParliamentClassification
 from seb.registries import tasks
 from seb.tasks_interface import MTEBTask, Task
 
@@ -14,8 +15,6 @@ def create_norec() -> Task:
 
 @tasks.register("Norwegian parliament")
 def create_norwegian_parliament() -> Task:
-    from mteb import NorwegianParliamentClassification
-
     task = MTEBTask(NorwegianParliamentClassification())
     task.name = "Norwegian parliament"
     task.domain = ["spoken"]