From 8a883c4255864c9ba8d567b8e3fa5e42236642e0 Mon Sep 17 00:00:00 2001
From: Lasse Hansen <lasseh0310@gmail.com>
Date: Mon, 31 Jul 2023 10:29:17 +0200
Subject: [PATCH 1/2] docs: minor updates to index page

---
 docs/index.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index f5394edc..b461e66e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,13 +6,13 @@ hide:
 
 # Scandinavian Embedding Benchmark
 
-This is the documentation for the Scandinavian Embedding Benchmark. This benchmark is intended to evaluate the sentence/documents embeddings of large language models.
+This is the documentation for the Scandinavian Embedding Benchmark. This benchmark is intended to evaluate the sentence/document embeddings of large language models.
 
 Intended uses for this benchmark:
 
 - Evaluating document embeddings of Scandinavian language models
 - Evaluating document embeddings for multilingual models on Scandinavian languages
-- Allow ranking of competing Scandinavian and multilingual models using no more compute that what a consumer laptop can provide 
+- Allow ranking of competing Scandinavian and multilingual models using no more compute than what a consumer laptop can provide 
 
 
 === "All"
@@ -34,9 +34,9 @@ Intended uses for this benchmark:
 
 ## Comparison to other benchmarks
 
-If you use this benchmark for a relative ranking of language models you should also take a look at [ScandEval](https://scandeval.github.io), which as opposed the this benchmark fully fine-tunes the models. It also includes structured predictions tasks such as named entity recognition. Many of the tasks in this embeddings benchmark is also included in ScandEval. A notable difference between the ScandEval and this benchmark is that it does not include machine translated tasks.
+If you use this benchmark for a relative ranking of language models you should also look at [ScandEval](https://scandeval.github.io), which as opposed to this benchmark fully fine-tunes the models. It also includes structured prediction tasks such as named entity recognition. Many of the tasks in this embedding benchmark are also included in ScandEval. A notable difference between ScandEval and this benchmark is that this one does not include machine-translated tasks.
 
-The tasks within this benchmark is also included in the [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard, though the aggregations methods very slightly. The MTEB is primarily an English embedding benchmark, with a few multilingual tasks along with a few additional languages. As a part of this project the tasks was also added to the MTEB leaderboard.
+The tasks within this benchmark are also included in the [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard, though the aggregation methods very slightly. MTEB is primarily an English embedding benchmark, with a few multilingual tasks and additional languages. As a part of this project, the tasks were also added to the MTEB leaderboard.
 
 
 

From 6e4ae7afb9829984d62798d2e5657b0d88c93fb8 Mon Sep 17 00:00:00 2001
From: Lasse Hansen <lasseh0310@gmail.com>
Date: Mon, 31 Jul 2023 10:37:55 +0200
Subject: [PATCH 2/2] feat: add multilingual sentence transformer

---
 src/seb/seb_models.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/seb/seb_models.py b/src/seb/seb_models.py
index c6dab544..66de289e 100644
--- a/src/seb/seb_models.py
+++ b/src/seb/seb_models.py
@@ -41,6 +41,19 @@ def create_all_mini_lm_l6_v2() -> SebModel:
         meta=meta,
     )
 
+@models.register("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+def create_multilingual_mini_lm_l12_v2() -> SebModel:
+    hf_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+    meta = ModelMeta(
+        name=hf_name.split("/")[-1],
+        huggingface_name=hf_name,
+        reference=f"https://huggingface.co/{hf_name}",
+        languages=[],
+    )
+    return SebModel(
+        loader=partial(get_sentence_transformer, model_name=hf_name),  # type: ignore
+        meta=meta,
+    )
 
 @models.register("KBLab/sentence-bert-swedish-cased")
 def create_sentence_swedish_cased() -> SebModel: