Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…ian-embedding-benchmark into add-summarization
  • Loading branch information
KennethEnevoldsen committed Jan 15, 2024
2 parents 04187b3 + 2b0a47a commit 5072584
Show file tree
Hide file tree
Showing 61 changed files with 1,071 additions and 314 deletions.
4 changes: 2 additions & 2 deletions .cookiecutter.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"_copy_without_render": [
"*.github"
],
"_template": "https://github.com/MartinBernstorff/swift-python-cookiecutter",
"_template": "https://github.com/KennethEnevoldsen/swift-python-cookiecutter",
"author": "Kenneth Enevoldsen",
"copyright_year": "2023",
"email": "Kennethcenevoldsen@gmail.com",
Expand All @@ -11,4 +11,4 @@
"package_name": "seb",
"project_name": "scandinavian-embedding-benchmark",
"version": "0.0.0"
}
}
2 changes: 1 addition & 1 deletion .cruft.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template": "https://github.com/KennethEnevoldsen/swift-python-cookiecutter",
"commit": "c5698bd37fe84cdc5f5f411a56a49c2f5db77dab",
"commit": "e02068889310225ea4f65ea0b203c2949e1597a9",
"checkout": null,
"context": {
"cookiecutter": {
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/stalebot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ jobs:
close-pr-message: "This PR was closed automatically. Feel free to re-open it if you still want to work on it."
close-pr-label: "closed-by-stalebot"
operations-per-run: 20
exempt-pr-labels: "dependencies,bot"
exempt-pr-labels: "no-stale"
exempt-issue-labels: "no-stale"
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ seb_cache/
site/
datawrapper_api_key.txt
hf_api_key.txt

# Test caches
test_task.json
test_encode_task.json
test-encode-task.json
31 changes: 0 additions & 31 deletions .pre-commit-config.yaml

This file was deleted.

108 changes: 108 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,114 @@



## v0.3.1 (2024-01-15)

### Documentation

* docs: added execute flag ([`ea0e9ca`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/ea0e9ca6329a062c229d6157005f68cca4b98f15))

### Fix

* fix: SebModel -> EmbeddingModel ([`d2f9efa`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/d2f9efa1e0a367f6240aef7f0cad5ba4a9b56b11))

* fix: Allow embedding size to be None when using CLI ([`c621a8b`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/c621a8b328b1f6063cbd29d8a43376a9f587a00e))

### Unknown

* Merge pull request #58 from KennethEnevoldsen/update-cruft

Update cruft ([`870e442`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/870e4424f4448c948a2f14f1d9035276b63e04a0))

* clean: removed pre-commit as it is no longer used ([`f1b5804`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/f1b5804b83796694157f282aacb1280986c58d01))

* updated from cruft template ([`bd7b11a`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/bd7b11a202cca340103163b26e809e9491fea52b))


## v0.3.0 (2024-01-14)

### Ci

* ci: Updated some names in the workflow ([`b7c3012`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/b7c3012d843d5eaa2017b96479f6ac730cd48087))

### Documentation

* docs: Added avg rank to benchmark table ([`15a821e`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/15a821e6ade440fc2a2bba218e0c91d8bbc3c480))

### Feature

* feat: Added option to not run a model ([`55cd023`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/55cd023cd69be59e7444bf19c331ee89b4c5003f))

### Fix

* fix: Added embedding size of models ([`2937099`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/29370992610b78055f27a8c3ad7504147acbff5b))

* fix: Added mistral current scores ([`748d8a9`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/748d8a97d7a9f1d5f186cd2d92468167f912e6ac))

* fix: Added prettier prints when running benchmark ([`012bcd9`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/012bcd9def66cfbfe203e6e7fd346cd09f7e2594))

* fix: Added option to ignore cache ([`8f36080`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/8f360805038ade5b6ba91a9726918b20ac0be6f6))

* fix: removed typer dependency ([`e519917`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/e519917331218e993b9cc040851cb8d5d0097fdc))

* fix: removed duplicate on update bnehcmark ([`ef6270c`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/ef6270c19ff3086da8e6596f7ffc03fdce6a0038))

* fix: Added cache dir to all entry points ([`3fb4280`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/3fb42806ffd9d0ccdbdcee45171d20a1390a8e68))

### Unknown

* Merge pull request #52 from KennethEnevoldsen/add-dataset

Add embedding size to benchmark ([`d40a633`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/d40a63385db585121cea606b1c0505099855cafa))

* Merge pull request #50 from KennethEnevoldsen/run-using-cache

Add public cache to benchmark ([`67e571c`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/67e571c3d0133dde1192c56ee21cb93afa4e2f3e))

* Merge branch 'main' of https://github.com/KennethEnevoldsen/Scandinavian-Embedding-Benchmark ([`6d9d7c1`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/6d9d7c180208f62532a69e010ac9a6235af20385))

* ignore files ([`a1904ed`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/a1904ed2afeb12c2ecb71a66a2b9946303abbaa5))

* Added make command for table in docs ([`0cb3522`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/0cb352299cd0ffba6c80edf68158d0157cca58fe))

* Merge pull request #51 from KennethEnevoldsen/run_mistral_on_ucloud

Added command for running on ucloud ([`396f79b`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/396f79b79c9461ffb28b2b98ac1d95218a0b65af))

* clean: remove test file from cache ([`89bb78f`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/89bb78faa9902822c7d3c4952d1d0e77aa0ec2ca))

* clean: removed test models from cache ([`10413e1`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/10413e1dda23bcb7741b6e96edcf871086f1e4e0))

* clean: remove tests from cache ([`af1f52d`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/af1f52d540fc6b951e3a2453502b00853420e33b))

* Added test for checking if benchmark is up to date ([`8fa2545`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/8fa254579d78e0dd4b1e7a9e857bc9ee87e458d0))

* Moved cache dir to package ([`94d6468`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/94d6468daf6ce2b33a53aa1485818bf38fafcaf2))

* Added command for running on ucloud ([`c48b32e`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/c48b32ed0b3863c10db06cd96f05bf098eb2a47a))

* Merge pull request #45 from KennethEnevoldsen/updated_norwegian_parl

Updated desc. for norwegian parl. ([`985dd5d`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/985dd5db69819a7c865d5094bd63df8e908a7c3d))

* Updated desc. for norwegian parl. ([`c7f1e74`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/c7f1e746dfa5596d63666fa994b1a951f3aaa573))

* Merge pull request #43 from KennethEnevoldsen/add-mistral

Added mistral dependencies ([`a4decba`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/a4decbae1d6c9f3bf84f0e665449d514cd365779))

* Added mistral dependencies ([`1d39008`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/1d39008c744139b788fd43b6426fc517e0355d12))

* Merge pull request #40 from x-tabdeveloping/main

Added E5 Mistral ([`c317ad8`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/c317ad824e3016bac8cb860f2e8feedfe71bfc65))

* Added E5 Mistral ([`01cfb90`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/01cfb908e592efa5068ff9715dc18e14c767d05f))

* Added open-source flag to danskbertz ([`8ef4e39`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/8ef4e39adf9e022ec046a80896f9510c0ec43390))

* Update index.md ([`41e350e`](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/commit/41e350ef7c3951a3f902188c30e91ecd35ed78eb))


## v0.2.10 (2023-12-07)

### Ci
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Here is a list of important resources for contributors:

[MIT license]: https://opensource.org/licenses/MIT
[source code]: https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark
[documentation]: https://scandinavian-embedding-benchmark.readthedocs.io/
[documentation]: https://KennethEnevoldsen.github.io/scandinavian-embedding-benchmark/index.html
[issue tracker]: https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/issues

## How to report a bug
Expand Down
41 changes: 41 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!-- This file is auto-generated -->

# Command Line Interface

Documentation for the command line interface of SEB.

## CLI

### `run`

Runs the Benchmark on a specified model.

**Examples:**

**Examples:**

To run a model on all languages and tasks

```{bash}
seb run sentence-transformers/all-MiniLM-L6-v2 -o results.json
```

if you only want to limit it to a subset of languages or tasks you can use the `--languages` and `--tasks` flags.
```{bash}
# Running a model on a subset of languages
seb run sentence-transformers/all-MiniLM-L6-v2 -o results.json -l nb nn
# Running a model on a subset of tasks
seb run sentence-transformers/all-MiniLM-L6-v2 -o results.json -t DKHate ScaLA
```

| Argument | Type | Description | Default |
| --------------------- | ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | -------- |
| `model_name` | `str` | The model name or path. If the model is not registrered in SEB it will be loaded using SentenceTransformers. | |
| `--output-path`, `-o` | `Path` | The path to save the output to. Can be a directory. | |
| `--languages`, `-l` | `Optional[list[str], NoneType]` | What languages subsection to run the benchmark on. If left blank it will run it on all languages. | `None` |
| `--tasks`, `-t` | `Optional[list[str], NoneType]` | What tasks should model be run on. Default to all tasks within the specified languages. | `None` |
| `--ignore-cache` | `bool` | Ignores caches models. Note that SEB ships with an existing cache. You can set the cache_dir using the environmental variable SEB_CACHE_DIR | `False` |
| `--ignore-errors` | `bool` | Should errors be ignored when running a model on a benchmark task. | `False` |
| `--code`, `-c` | `Path` | Code to run before executing benchmark. Useful for adding custom model to registries. | `None` |
| `--logging-level` | `str` | Logging level for the benchmark. | `'INFO'` |
15 changes: 15 additions & 0 deletions docs/create_cli_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This just created a rough draft of the CLI documentation. It is not
# intended to be used for anything other than a starting point.
# at least we would need this issue fixed first:
# https://github.com/explosion/radicli/issues/30

from pathlib import Path

from seb.cli import cli

title = "Command Line Interface"
description = "Documentation for the command line interface of SEB."

if __name__ == "__main__":
with Path("docs/cli.md").open("w", encoding="utf8") as f:
f.write(cli.document(title=title, description=description))
20 changes: 20 additions & 0 deletions docs/paper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Introduction
- importance of embeddings models (search, RAG)
- few benchmarks for scandinavian languages


## Contributions:
- Creates benchmark for scandinavian languages
- integrates with MTEB
- with broad coverage of both domains and use-cases
- Allow for custom encoding methods dependent on task (as opposed to mteb)
- Added a series of new datasets (?)
- easily extendable

## (Design principles)
- flexible (easy to add new models)
- easy to run on even small laptops
- minimal dependencies besides MTEB
- It should be transparent how models are run as often the exact prompt used can notably influence performance. --> this models are implemented as a part of the bencmark.

# Results
2 changes: 1 addition & 1 deletion docs/run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def benchmark_result_to_row(
scores = [get_main_score(t, langs) for t in sorted_tasks] # type: ignore

df = pd.DataFrame([scores], columns=task_names, index=[mdl_name])
df["Average Score"] = np.mean(scores) # type: ignore
df["Average Score"] = result.get_mean_score() # type: ignore
df["Open Source"] = open_source_to_string(result.meta.open_source)
df["Embedding Size"] = result.meta.embedding_size
return df
Expand Down
3 changes: 2 additions & 1 deletion makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ update-from-template:
@echo "This will update the project from the template, make sure to resolve any .rej files"
cruft update --skip-apply-ask

update-benchmark:
run-benchmark:
# set environment variables
hf_api_key=$(cat hf_api_key.txt)
export HF_TOKEN=hf_api_key
Expand All @@ -55,3 +55,4 @@ check-benchmark-is-up-to-date:
@echo "--- 🔄 Checking benchmark is up to date ---"

python src/scripts/check_benchmark_is_up_to_date.py

3 changes: 3 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,12 @@ nav:
- Package:
- Installation: installation.md
- Getting Started: getting_started.ipynb
- CLI: cli.md
- API: api.md
plugins:
- mkdocs-jupyter
# - mkdocs-jupyter:
# - execute: true # should be true when mistral results is merged in
- search
- mkdocstrings:
handlers:
Expand Down
14 changes: 12 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "seb"
version = "0.2.10"
version = "0.3.1"
authors = [
{ name = "Kenneth Enevoldsen", email = "Kennethcenevoldsen@gmail.com" },
]
Expand All @@ -20,13 +20,14 @@ classifiers = [
requires-python = ">=3.9"

dependencies = [
"tabulate>=0.9.0",
"mteb[beir]==1.1.1",
"pydantic>=2.1.0",
"catalogue>=2.0.8",
"radicli>=0.0.25",

]


[project.license]
file = "LICENSE"
name = "MIT"
Expand Down Expand Up @@ -63,6 +64,10 @@ homepage = "https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmar
repository = "https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark"
documentation = "https://KennethEnevoldsen.github.io/scandinavian-embedding-benchmark/"


[project.scripts]
seb = "seb.cli.cli:setup_cli"

[tool.pyright]
exclude = [".*venv*"]
pythonPlatform = "Darwin"
Expand Down Expand Up @@ -100,6 +105,7 @@ select = [
]
ignore = [
"ANN101",
"ANN102",
"ANN401",
"E402",
"E501",
Expand Down Expand Up @@ -140,6 +146,10 @@ exclude = [
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py39"

[tool.ruff.format]
docstring-code-line-length = 88


[tool.ruff.flake8-annotations]
mypy-init-return = true
suppress-none-returning = true
Expand Down
2 changes: 1 addition & 1 deletion src/seb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .benchmark import Benchmark
from .full_benchmark import run_benchmark
from .model_interface import ModelInterface, ModelMeta, SebModel
from .model_interface import ModelInterface, ModelMeta, EmbeddingModel
from .registries import (
get_all_models,
get_all_tasks,
Expand Down
6 changes: 3 additions & 3 deletions src/seb/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seb.cli import main

if __name__ == "__main__":
main()
from seb.cli import setup_cli

setup_cli()
Loading

0 comments on commit 5072584

Please sign in to comment.