From 64bace65f840ffb8c11cc9ba689b4509d4e18e26 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Thu, 18 Jan 2024 13:34:41 +0100 Subject: [PATCH] fix: restructure repo --- src/seb/__init__.py | 9 +- src/seb/benchmark.py | 4 +- src/seb/full_benchmark.py | 3 +- .../model.py} | 20 ++- .../mteb_task.py} | 85 +++------- src/seb/interfaces/task.py | 151 ++++++++++++++++++ .../__init__.py | 0 .../cohere_models.py | 10 +- .../e5_mistral.py | 8 +- .../e5_models.py | 7 +- .../fairseq_models.py | 4 +- .../hf_models.py | 2 +- .../openai_models.py | 4 +- .../__init__.py | 0 .../{seb_tasks => registered_tasks}/danish.py | 4 +- src/seb/{ => registered_tasks}/mteb_tasks.py | 25 +-- .../multilingual.py | 7 +- .../norwegian.py | 5 +- .../swedish.py | 7 +- src/seb/registries.py | 4 +- src/seb/result_dataclasses.py | 2 +- src/seb/types.py | 35 ++++ tests/cli/benchmark_cli_code_inject.py | 4 +- tests/cli/test_cli.py | 2 +- tests/dummy_model.py | 6 +- tests/dummy_task.py | 68 ++++---- tests/test_task_dependent_encode.py | 86 ++++++++++ tests/test_tasks.py | 2 +- 28 files changed, 407 insertions(+), 157 deletions(-) rename src/seb/{model_interface.py => interfaces/model.py} (84%) rename src/seb/{tasks_interface.py => interfaces/mteb_task.py} (59%) create mode 100644 src/seb/interfaces/task.py rename src/seb/{seb_models => registered_models}/__init__.py (100%) rename src/seb/{seb_models => registered_models}/cohere_models.py (88%) rename src/seb/{seb_models => registered_models}/e5_mistral.py (96%) rename src/seb/{seb_models => registered_models}/e5_models.py (96%) rename src/seb/{seb_models => registered_models}/fairseq_models.py (97%) rename src/seb/{seb_models => registered_models}/hf_models.py (99%) rename src/seb/{seb_models => registered_models}/openai_models.py (96%) rename src/seb/{seb_tasks => registered_tasks}/__init__.py (100%) rename src/seb/{seb_tasks => registered_tasks}/danish.py (95%) rename src/seb/{ => registered_tasks}/mteb_tasks.py (91%) rename src/seb/{seb_tasks => registered_tasks}/multilingual.py (95%) rename src/seb/{seb_tasks => registered_tasks}/norwegian.py (74%) rename src/seb/{seb_tasks => registered_tasks}/swedish.py (82%) create mode 100644 src/seb/types.py create mode 100644 tests/test_task_dependent_encode.py diff --git a/src/seb/__init__.py b/src/seb/__init__.py index 2963ee30..ad145c04 100644 --- a/src/seb/__init__.py +++ b/src/seb/__init__.py @@ -1,6 +1,5 @@ from .benchmark import Benchmark from .full_benchmark import run_benchmark -from .model_interface import ModelInterface, ModelMeta, EmbeddingModel from .registries import ( get_all_models, get_all_tasks, @@ -9,7 +8,9 @@ models, tasks, ) + +from .interfaces.task import Task +from .interfaces.model import EmbeddingModel, ModelMeta, Encoder from .result_dataclasses import BenchmarkResults, TaskError, TaskResult -from .seb_models import * # import all SEB models -from .seb_tasks import * # import all SEB tasks -from .tasks_interface import Task +from .registered_models import * # import all SEB models +from .registered_tasks import * # import all SEB tasks diff --git a/src/seb/benchmark.py b/src/seb/benchmark.py index cd55f03d..fcf3fbff 100644 --- a/src/seb/benchmark.py +++ b/src/seb/benchmark.py @@ -7,10 +7,10 @@ from tqdm import tqdm -from .model_interface import EmbeddingModel +from .interfaces.model import EmbeddingModel +from .interfaces.task import Task from .registries import get_all_tasks, get_task from .result_dataclasses import BenchmarkResults, TaskError, TaskResult -from .tasks_interface import Task from .warning_ignore_manager import WarningIgnoreContextManager logger = logging.getLogger(__name__) diff --git a/src/seb/full_benchmark.py b/src/seb/full_benchmark.py index 10ea7beb..e2742d3a 100644 --- a/src/seb/full_benchmark.py +++ b/src/seb/full_benchmark.py @@ -6,9 +6,8 @@ from pathlib import Path from typing import Optional -from seb.model_interface import EmbeddingModel - from .benchmark import Benchmark +from .interfaces.model import EmbeddingModel from .registries import get_all_models from .result_dataclasses import BenchmarkResults diff --git a/src/seb/model_interface.py b/src/seb/interfaces/model.py similarity index 84% rename from src/seb/model_interface.py rename to src/seb/interfaces/model.py index 4d7317f6..ddd98ed8 100644 --- a/src/seb/model_interface.py +++ b/src/seb/interfaces/model.py @@ -1,14 +1,15 @@ -from typing import Any, Callable, Optional, Protocol, Union, runtime_checkable +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, runtime_checkable -from numpy import ndarray from pydantic import BaseModel -from torch import Tensor -ArrayLike = Union[ndarray, Tensor] +from ..types import ArrayLike + +if TYPE_CHECKING: + from .task import Task @runtime_checkable -class ModelInterface(Protocol): +class Encoder(Protocol): """ Interface which all models must implement. """ @@ -16,12 +17,15 @@ class ModelInterface(Protocol): def encode( self, sentences: list[str], + task: "Task", batch_size: int = 32, **kwargs: dict, ) -> ArrayLike: """Returns a list of embeddings for the given sentences. Args: sentences: List of sentences to encode + task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need + to be used. batch_size: Batch size for the encoding kwargs: arguments to pass to the models encode method @@ -62,11 +66,11 @@ class EmbeddingModel(BaseModel): """ meta: ModelMeta - loader: Callable[[], ModelInterface] - _model: Optional[ModelInterface] = None + loader: Callable[[], Encoder] + _model: Optional[Encoder] = None @property - def model(self) -> ModelInterface: + def model(self) -> Encoder: """ Dynimically load the model. """ diff --git a/src/seb/tasks_interface.py b/src/seb/interfaces/mteb_task.py similarity index 59% rename from src/seb/tasks_interface.py rename to src/seb/interfaces/mteb_task.py index aeb17495..dad0cb61 100644 --- a/src/seb/tasks_interface.py +++ b/src/seb/interfaces/mteb_task.py @@ -1,60 +1,24 @@ from datetime import datetime -from typing import Any, Protocol, runtime_checkable +from typing import Any import numpy as np from datasets import DatasetDict, concatenate_datasets from mteb import AbsTask from mteb import __version__ as mteb_version -from .model_interface import ModelInterface -from .result_dataclasses import TaskResult +from ..result_dataclasses import TaskResult +from ..types import ArrayLike +from .model import Encoder +from .task import DescriptiveDatasetStats, Task -@runtime_checkable -class Task(Protocol): - """ - A task is a specific evaluation task for a sentence embedding model. +class MTEBTaskModel(Encoder): + def __init__(self, mteb_model: Encoder, task: Task) -> None: + self.mteb_model = mteb_model + self.task = task - Attributes: - name: The name of the task. - main_score: The main score of the task. - description: A description of the task. - reference: A reference to the task. - version: The version of the task. - languages: The languages of the task. - domain: The domains of the task. Should be one of the categories listed on https://universaldependencies.org - - """ - - name: str - main_score: str - description: str - reference: str - version: str - languages: list[str] - domain: list[str] - - def evaluate(self, model: ModelInterface) -> TaskResult: - """ - Evaluates a Sentence Embedding Model on the task. - - Args: - model: A sentence embedding model. - - Returns: - A TaskResult object. - """ - ... - - def get_descriptive_stats(self) -> dict[str, Any]: - ... - - def name_to_path(self) -> str: - """ - Convert a name to a path. - """ - name = self.name.replace("/", "__").replace(" ", "_") - return name + def encode(self, texts: list[str], **kwargs: Any) -> ArrayLike: + return self.mteb_model.encode(texts, task=self.task, **kwargs) class MTEBTask(Task): @@ -67,7 +31,7 @@ def __init__(self, mteb_task: AbsTask) -> None: self.version = f"{mteb_version}" self.reference = mteb_desc["reference"] self.languages = mteb_desc["eval_langs"] - self.type = mteb_desc["type"] + self.task_type = mteb_desc["type"] self.domain = [] self._text_columns = ["text"] @@ -93,26 +57,27 @@ def load_data(self) -> DatasetDict: return DatasetDict(ds) - def get_descriptive_stats(self) -> dict[str, Any]: - ds = self.load_data() + def get_descriptive_stats(self) -> DescriptiveDatasetStats: + ds: DatasetDict = self.load_data() texts = [] for split in ds: for text_column in self._text_columns: texts += ds[split][text_column] - document_lengths = [len(text) for text in texts] + document_lengths = np.array([len(text) for text in texts]) - mean = np.mean(document_lengths) - std = np.std(document_lengths) - return { - "mean_document_length": mean, - "std_document_length": std, - "num_documents": len(document_lengths), - } + mean = float(np.mean(document_lengths)) + std = float(np.std(document_lengths)) + return DescriptiveDatasetStats( + mean_document_length=mean, + std_document_length=std, + num_documents=len(document_lengths), + ) - def evaluate(self, model: ModelInterface) -> TaskResult: + def evaluate(self, model: Encoder) -> TaskResult: split = self.mteb_task.description["eval_splits"][0] - scores = self.mteb_task.evaluate(model, split=split) + task_model = MTEBTaskModel(model, self) + scores = self.mteb_task.evaluate(task_model, split=split) if scores is None: raise ValueError("MTEBTask evaluation failed.") diff --git a/src/seb/interfaces/task.py b/src/seb/interfaces/task.py new file mode 100644 index 00000000..ab3d4fec --- /dev/null +++ b/src/seb/interfaces/task.py @@ -0,0 +1,151 @@ +from typing import Any, Callable, Optional, Protocol, runtime_checkable + +from pydantic import BaseModel + +from ..result_dataclasses import TaskResult +from ..types import ArrayLike, DescriptiveDatasetStats, Domain, Language, TaskType + + +@runtime_checkable +class Task(Protocol): + """ + A task is a specific evaluation task for a sentence embedding model. + + Attributes: + name: The name of the task. + main_score: The main score of the task. + description: A description of the task. + reference: A reference to the task. + version: The version of the task. + languages: The languages of the task. + domain: The domains of the task. Should be one of the categories listed on https://universaldependencies.org + """ + + name: str + main_score: str + description: str + reference: str + version: str + languages: list[Language] + domain: list[Domain] + task_type: TaskType + + def evaluate(self, model: "Encoder") -> TaskResult: + """ + Evaluates a Sentence Embedding Model on the task. + + Args: + model: A sentence embedding model. + + Returns: + A TaskResult object. + """ + ... + + def get_descriptive_stats(self) -> DescriptiveDatasetStats: + ... + + def name_to_path(self) -> str: + """ + Convert a name to a path. + """ + name = self.name.replace("/", "__").replace(" ", "_") + return name + + +@runtime_checkable +class Encoder(Protocol): + """ + Interface which all models must implement. + """ + + def encode( + self, + sentences: list[str], + task: Task, + batch_size: int = 32, + **kwargs: dict, + ) -> ArrayLike: + """Returns a list of embeddings for the given sentences. + Args: + sentences: List of sentences to encode + task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need + to be used. + batch_size: Batch size for the encoding + kwargs: arguments to pass to the models encode method + + Returns: + Embeddings for the given documents + """ + ... + + +class ModelMeta(BaseModel): + name: str + description: Optional[str] = None + huggingface_name: Optional[str] = None + reference: Optional[str] = None + languages: list[str] = [] + open_source: bool = False + embedding_size: Optional[int] = None + + def get_path_name(self) -> str: + if self.huggingface_name is None: + return self._name_to_path(self.name) + return self._name_to_path(self.huggingface_name) + + @staticmethod + def _name_to_path(name: str) -> str: + return name.replace("/", "__").replace(" ", "_") + + def get_huggingface_url(self) -> str: + if self.huggingface_name is None: + raise ValueError("This model does not have an associated huggingface name.") + return f"https://huggingface.co/{self.huggingface_name}" + + +class EmbeddingModel(BaseModel): + """ + An embedding model as implemented in SEB. It notably dynamically loads models (such that models are not loaded when a cache is hit) + and includes metadata pertaining to the specific model. + """ + + meta: ModelMeta + loader: Callable[[], Encoder] + _model: Optional[Encoder] = None + + @property + def model(self) -> Encoder: + """ + Dynimically load the model. + """ + if self._model is None: + self._model = self.loader() + return self._model + + @property + def number_of_parameters(self) -> Optional[int]: + """ + Returns the number of parameters in the model. + """ + if hasattr(self.model, "num_parameters"): + return sum(p.numel() for p in self.model.parameters() if p.requires_grad) # type: ignore + return None + + def encode( + self, + sentences: list[str], + batch_size: int = 32, + **kwargs: Any, + ) -> ArrayLike: + """ + Returns a list of embeddings for the given sentences. + Args: + sentences: List of sentences to encode + batch_size: Batch size for the encoding + kwargs: arguments to pass to the models encode method + + Returns: + Embeddings for the given documents + """ + return self.model.encode(sentences, batch_size=batch_size, **kwargs) diff --git a/src/seb/seb_models/__init__.py b/src/seb/registered_models/__init__.py similarity index 100% rename from src/seb/seb_models/__init__.py rename to src/seb/registered_models/__init__.py diff --git a/src/seb/seb_models/cohere_models.py b/src/seb/registered_models/cohere_models.py similarity index 88% rename from src/seb/seb_models/cohere_models.py rename to src/seb/registered_models/cohere_models.py index 8105395e..b41e7a0c 100644 --- a/src/seb/seb_models/cohere_models.py +++ b/src/seb/registered_models/cohere_models.py @@ -9,13 +9,13 @@ import torch -from seb.model_interface import EmbeddingModel, ModelInterface, ModelMeta +import seb from seb.registries import models logger = logging.getLogger(__name__) -class CohereTextEmbeddingModel(ModelInterface): +class CohereTextEmbeddingModel(seb.Encoder): def __init__(self, model_name: str) -> None: self.model_name = model_name @@ -53,9 +53,9 @@ def encode( @models.register("embed-multilingual-v3.0") -def create_embed_multilingual_v3() -> EmbeddingModel: +def create_embed_multilingual_v3() -> seb.EmbeddingModel: model_name = "embed-multilingual-v3.0" - meta = ModelMeta( + meta = seb.ModelMeta( name=model_name, huggingface_name=None, reference="https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0", @@ -63,7 +63,7 @@ def create_embed_multilingual_v3() -> EmbeddingModel: open_source=False, embedding_size=1024, ) - return EmbeddingModel( + return seb.EmbeddingModel( loader=partial(CohereTextEmbeddingModel, model_name=model_name), meta=meta, ) diff --git a/src/seb/seb_models/e5_mistral.py b/src/seb/registered_models/e5_mistral.py similarity index 96% rename from src/seb/seb_models/e5_mistral.py rename to src/seb/registered_models/e5_mistral.py index 9d90f688..0698cce7 100644 --- a/src/seb/seb_models/e5_mistral.py +++ b/src/seb/registered_models/e5_mistral.py @@ -7,8 +7,10 @@ from torch import Tensor from transformers import AutoModel, AutoTokenizer, BatchEncoding -from seb import EmbeddingModel, ModelInterface, ModelMeta, models -from seb.model_interface import ArrayLike +from seb import models +from seb.interfaces.model import EmbeddingModel, Encoder, ModelMeta + +from ..types import ArrayLike T = TypeVar("T") @@ -22,7 +24,7 @@ def batched(iterable: Iterable[T], n: int) -> Iterable[tuple[T, ...]]: yield batch -class E5Mistral(ModelInterface): +class E5Mistral(Encoder): max_length = 4096 def __init__(self): diff --git a/src/seb/seb_models/e5_models.py b/src/seb/registered_models/e5_models.py similarity index 96% rename from src/seb/seb_models/e5_models.py rename to src/seb/registered_models/e5_models.py index 177a05df..e804e089 100644 --- a/src/seb/seb_models/e5_models.py +++ b/src/seb/registered_models/e5_models.py @@ -1,12 +1,13 @@ from functools import partial -from seb import EmbeddingModel, ModelInterface, ModelMeta, models -from seb.model_interface import ArrayLike +from seb import models +from ..interfaces.model import EmbeddingModel, Encoder, ModelMeta +from ..types import ArrayLike from .hf_models import get_sentence_transformer -class E5Wrapper(ModelInterface): +class E5Wrapper(Encoder): def __init__(self, model_name: str): self.model_name = model_name self.mdl = get_sentence_transformer(model_name) diff --git a/src/seb/seb_models/fairseq_models.py b/src/seb/registered_models/fairseq_models.py similarity index 97% rename from src/seb/seb_models/fairseq_models.py rename to src/seb/registered_models/fairseq_models.py index 98dc6fcc..af2d8f02 100644 --- a/src/seb/seb_models/fairseq_models.py +++ b/src/seb/registered_models/fairseq_models.py @@ -5,7 +5,7 @@ import torch -from seb.model_interface import EmbeddingModel, ModelInterface, ModelMeta +from seb.interfaces.model import EmbeddingModel, Encoder, ModelMeta from seb.registries import models @@ -18,7 +18,7 @@ def truncate_seq_length( # noqa: ANN201 return sequence_batch -class SonarTextToEmbeddingModelPipeline(torch.nn.Module, ModelInterface): +class SonarTextToEmbeddingModelPipeline(torch.nn.Module, Encoder): def __init__( self, encoder_name: str, diff --git a/src/seb/seb_models/hf_models.py b/src/seb/registered_models/hf_models.py similarity index 99% rename from src/seb/seb_models/hf_models.py rename to src/seb/registered_models/hf_models.py index 197f1d0e..9326d650 100644 --- a/src/seb/seb_models/hf_models.py +++ b/src/seb/registered_models/hf_models.py @@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer -from seb.model_interface import EmbeddingModel, ModelMeta +from seb.interfaces.model import EmbeddingModel, ModelMeta from seb.registries import models diff --git a/src/seb/seb_models/openai_models.py b/src/seb/registered_models/openai_models.py similarity index 96% rename from src/seb/seb_models/openai_models.py rename to src/seb/registered_models/openai_models.py index d2f6bea5..66c4b849 100644 --- a/src/seb/seb_models/openai_models.py +++ b/src/seb/registered_models/openai_models.py @@ -9,13 +9,13 @@ import torch -from seb.model_interface import EmbeddingModel, ModelInterface, ModelMeta +from seb.interfaces.model import EmbeddingModel, Encoder, ModelMeta from seb.registries import models logger = logging.getLogger(__name__) -class OpenaiTextEmbeddingModel(ModelInterface): +class OpenaiTextEmbeddingModel(Encoder): def __init__(self, api_name: str, input_sentences: int = 64) -> None: self.api_name = api_name self.input_sentences = input_sentences diff --git a/src/seb/seb_tasks/__init__.py b/src/seb/registered_tasks/__init__.py similarity index 100% rename from src/seb/seb_tasks/__init__.py rename to src/seb/registered_tasks/__init__.py diff --git a/src/seb/seb_tasks/danish.py b/src/seb/registered_tasks/danish.py similarity index 95% rename from src/seb/seb_tasks/danish.py rename to src/seb/registered_tasks/danish.py index 348a1070..a8169577 100644 --- a/src/seb/seb_tasks/danish.py +++ b/src/seb/registered_tasks/danish.py @@ -1,5 +1,7 @@ +from seb.interfaces.task import Task from seb.registries import tasks -from seb.tasks_interface import MTEBTask, Task + +from ..interfaces.mteb_task import MTEBTask @tasks.register("Angry Tweets") diff --git a/src/seb/mteb_tasks.py b/src/seb/registered_tasks/mteb_tasks.py similarity index 91% rename from src/seb/mteb_tasks.py rename to src/seb/registered_tasks/mteb_tasks.py index ec535435..ad7ef0f7 100644 --- a/src/seb/mteb_tasks.py +++ b/src/seb/registered_tasks/mteb_tasks.py @@ -1,9 +1,11 @@ import random -from typing import Any +from typing import Any, TypeVar import datasets from mteb.abstasks import AbsTaskClassification, AbsTaskRetrieval, AbsTaskSTS +T = TypeVar("T") + class SweFaqRetrieval(AbsTaskRetrieval): @property @@ -101,13 +103,6 @@ def description(self) -> dict[str, Any]: } -def sattolo_cycle(items): - for i in range(len(items) - 1, 0, -1): - j = random.randint(0, i - 1) - items[i], items[j] = items[j], items[i] - return items - - class SwednSummarizationSTS(AbsTaskSTS): def load_data(self, **kwargs: dict): # noqa: ARG002 """ @@ -143,7 +138,7 @@ def dataset_transform(self) -> None: summaries = ds_split["sentence2"] articles = ds_split["sentence1"] scores = ds_split["score"] - mismatched_summaries = sattolo_cycle(summaries) + mismatched_summaries = self.sattolo_cycle(summaries) # add all the mismatched examples as negative examples mismatched_ds = datasets.Dataset.from_dict( @@ -174,3 +169,15 @@ def description(self) -> dict[str, Any]: "max_score": 1, "revision": "ef1661775d746e0844b299164773db733bdc0bf6", } + + @staticmethod + def sattolo_cycle(items: list[T]) -> list[T]: + """ + The Sattolo cycle is a simple algorithm for randomly shuffling an array in-place. + It ensures that the element i, will not be in the ith position of the result. + """ + + for i in range(len(items) - 1, 0, -1): + j = random.randint(0, i - 1) + items[i], items[j] = items[j], items[i] + return items diff --git a/src/seb/seb_tasks/multilingual.py b/src/seb/registered_tasks/multilingual.py similarity index 95% rename from src/seb/seb_tasks/multilingual.py rename to src/seb/registered_tasks/multilingual.py index fdeedb7a..c2464040 100644 --- a/src/seb/seb_tasks/multilingual.py +++ b/src/seb/registered_tasks/multilingual.py @@ -4,10 +4,11 @@ import numpy as np from datasets import DatasetDict, concatenate_datasets -from seb.model_interface import ModelInterface +from seb.interfaces.model import Encoder +from seb.interfaces.mteb_task import MTEBTask +from seb.interfaces.task import Task from seb.registries import tasks from seb.result_dataclasses import TaskResult -from seb.tasks_interface import MTEBTask, Task @tasks.register("Massive Intent") @@ -93,7 +94,7 @@ def get_descriptive_stats(self) -> dict[str, Any]: "num_documents": len(document_lengths), } - def evaluate(self, model: ModelInterface) -> TaskResult: + def evaluate(self, model: Encoder) -> TaskResult: scores = {} for lang, mteb_task in self.mteb_tasks.items(): mteb_task.load_data() diff --git a/src/seb/seb_tasks/norwegian.py b/src/seb/registered_tasks/norwegian.py similarity index 74% rename from src/seb/seb_tasks/norwegian.py rename to src/seb/registered_tasks/norwegian.py index 5b27b782..c0b167a8 100644 --- a/src/seb/seb_tasks/norwegian.py +++ b/src/seb/registered_tasks/norwegian.py @@ -1,6 +1,7 @@ -from seb.mteb_tasks import NorwegianParliamentClassification +from seb.interfaces.mteb_task import MTEBTask +from seb.interfaces.task import Task +from seb.registered_tasks.mteb_tasks import NorwegianParliamentClassification from seb.registries import tasks -from seb.tasks_interface import MTEBTask, Task @tasks.register("NoReC") diff --git a/src/seb/seb_tasks/swedish.py b/src/seb/registered_tasks/swedish.py similarity index 82% rename from src/seb/seb_tasks/swedish.py rename to src/seb/registered_tasks/swedish.py index 5e6c1f41..766e5a1c 100644 --- a/src/seb/seb_tasks/swedish.py +++ b/src/seb/registered_tasks/swedish.py @@ -1,5 +1,6 @@ +from seb.interfaces.mteb_task import MTEBTask +from seb.interfaces.task import Task from seb.registries import tasks -from seb.tasks_interface import MTEBTask, Task @tasks.register("SweReC") @@ -24,7 +25,7 @@ def create_dalaj() -> Task: @tasks.register("SweFAQ") def create_swefaq() -> Task: - from seb.mteb_tasks import SweFaqRetrieval + from seb.registered_tasks.mteb_tasks import SweFaqRetrieval task = MTEBTask(SweFaqRetrieval()) task.name = "SweFAQ" @@ -37,7 +38,7 @@ def create_swefaq() -> Task: # temporarily disabled - will be added back in the future (along with the new datasets) # @tasks.register("Swedn") def create_swedn() -> Task: - from seb.mteb_tasks import SwednSummarizationSTS + from seb.registered_tasks.mteb_tasks import SwednSummarizationSTS task = MTEBTask(SwednSummarizationSTS()) task.name = "Swedn" diff --git a/src/seb/registries.py b/src/seb/registries.py index 012681ed..35a2e7aa 100644 --- a/src/seb/registries.py +++ b/src/seb/registries.py @@ -1,7 +1,7 @@ import catalogue -from .model_interface import EmbeddingModel -from .tasks_interface import Task +from .interfaces.model import EmbeddingModel +from .interfaces.task import Task models = catalogue.create("seb", "models") tasks = catalogue.create("seb", "tasks") diff --git a/src/seb/result_dataclasses.py b/src/seb/result_dataclasses.py index a626e67d..be60602d 100644 --- a/src/seb/result_dataclasses.py +++ b/src/seb/result_dataclasses.py @@ -7,7 +7,7 @@ import numpy as np from pydantic import BaseModel -from .model_interface import ModelMeta +from .interfaces.model import ModelMeta class TaskResult(BaseModel): diff --git a/src/seb/types.py b/src/seb/types.py new file mode 100644 index 00000000..97b088e7 --- /dev/null +++ b/src/seb/types.py @@ -0,0 +1,35 @@ +from typing import Literal, TypedDict, Union + +from numpy import ndarray +from torch import Tensor + +ArrayLike = Union[ndarray, Tensor] + + +Domain = Literal[ + "social", + "peotry", + "wiki", + "fiction", + "non-fiction", + "web", + "legal", + "news", + "academic", + "spoken", + "reviews", + "blog", + "medical", + "government", + "bible", +] + +Language = Literal["da", "nb", "nn", "sv"] + +TaskType = Literal["Classification", "Retrieval", "STS", "BitextMining", "Clustering"] + + +class DescriptiveDatasetStats(TypedDict): + mean_document_length: float + std_document_length: float + num_documents: int diff --git a/tests/cli/benchmark_cli_code_inject.py b/tests/cli/benchmark_cli_code_inject.py index 72f72601..7e4107e9 100644 --- a/tests/cli/benchmark_cli_code_inject.py +++ b/tests/cli/benchmark_cli_code_inject.py @@ -16,7 +16,7 @@ class DummyTask(seb.Task): languages = [] # noqa: RUF012 domain = [] # noqa: RUF012 - def evaluate(self, model: seb.ModelInterface) -> seb.TaskResult: + def evaluate(self, model: seb.Encoder) -> seb.TaskResult: model.encode(["a test sentence"]) return seb.TaskResult( @@ -54,7 +54,7 @@ def encode( def load_test_model() -> TestEncoder: return TestEncoder() - assert isinstance(TestEncoder, seb.ModelInterface) + assert isinstance(TestEncoder, seb.Encoder) return seb.EmbeddingModel( meta=seb.ModelMeta(name="test_model", embedding_size=100), diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index e2f7bf0b..a5d85183 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -2,10 +2,10 @@ from pathlib import Path from typing import Union +import numpy as np import pytest import seb from seb.cli import cli, run_benchmark_cli -import numpy as np test_dir = Path(__file__).parent diff --git a/tests/dummy_model.py b/tests/dummy_model.py index c56560f6..2e2c3a3a 100644 --- a/tests/dummy_model.py +++ b/tests/dummy_model.py @@ -5,7 +5,7 @@ @models.register("test_model") def create_test_model() -> seb.EmbeddingModel: - class TestEncoder: + class TestEncoder(seb.Encoder): def encode( self, sentences: list[str], @@ -18,9 +18,7 @@ def encode( def load_test_model() -> TestEncoder: return TestEncoder() - assert isinstance(TestEncoder, seb.ModelInterface) - return seb.EmbeddingModel( meta=seb.ModelMeta(name="test_model", embedding_size=100), loader=load_test_model, - ) # type: ignore + ) diff --git a/tests/dummy_task.py b/tests/dummy_task.py index 6286bf5a..8fbf52bd 100644 --- a/tests/dummy_task.py +++ b/tests/dummy_task.py @@ -1,41 +1,42 @@ from datetime import datetime +from typing import Any import seb -from seb.registries import tasks -def create_test_task() -> seb.Task: - class DummyTask(seb.Task): - name = "test task" - main_score = "a_metric" - description = "NA" - reference = "NA" - version = "NA" - languages = [] # noqa: RUF012 - - def evaluate(self, model: seb.ModelInterface) -> seb.TaskResult: # noqa: ARG002 - return seb.TaskResult( - task_name="test task", - task_description="NA", - task_version="NA", - time_of_run=datetime.now(), - scores={"en": {"a_metric": 1.0}}, - main_score="a_metric", - ) +class TestTask(seb.Task): + name = "test task" + main_score = "a_metric" + description = "NA" + reference = "NA" + version = "NA" + languages = [] # noqa: RUF012 + domain = [] # noqa: RUF012 + task_type = "Classification" + + def evaluate(self, model: seb.Encoder) -> seb.TaskResult: # noqa: ARG002 + return seb.TaskResult( + task_name="test task", + task_description="NA", + task_version="NA", + time_of_run=datetime.now(), + scores={"en": {"a_metric": 1.0}}, + main_score="a_metric", + ) - return DummyTask() + def get_descriptive_stats(self) -> dict[str, Any]: + return {} + + +def create_test_task() -> seb.Task: + return TestTask() def create_test_encode_task() -> seb.Task: - class DummyTask(seb.Task): + class TestTaskWithEncode(TestTask): name = "test encode task" - main_score = "a_metric" - description = "NA" - reference = "NA" - version = "NA" - languages = [] # noqa: RUF012 - def evaluate(self, model: seb.ModelInterface) -> seb.TaskResult: + def evaluate(self, model: seb.Encoder) -> seb.TaskResult: model.encode(["a test sentence"]) return seb.TaskResult( @@ -47,7 +48,7 @@ def evaluate(self, model: seb.ModelInterface) -> seb.TaskResult: main_score="a_metric", ) - return DummyTask() + return TestTaskWithEncode() def create_test_raise_error_task() -> seb.Task: @@ -55,15 +56,10 @@ def create_test_raise_error_task() -> seb.Task: Note this task is not registered as it will cause errrors in other tests. """ - class DummyTask(seb.Task): + class TestTaskWithError(TestTask): name = "test raise error task" - main_score = "a_metric" - description = "NA" - reference = "NA" - version = "NA" - languages = [] # noqa: RUF012 - def evaluate(self, model: seb.ModelInterface) -> seb.TaskResult: # noqa ARG002 + def evaluate(self, model: seb.Encoder) -> seb.TaskResult: # noqa ARG002 raise ValueError("Test raised error. This error should be handled.") - return DummyTask() + return TestTaskWithError() diff --git a/tests/test_task_dependent_encode.py b/tests/test_task_dependent_encode.py new file mode 100644 index 00000000..5d3dc3e9 --- /dev/null +++ b/tests/test_task_dependent_encode.py @@ -0,0 +1,86 @@ +from datetime import datetime + +import numpy as np +import seb + +from .dummy_task import TestTask + + +def create_test_model_with_task_dependent_encode() -> seb.EmbeddingModel: + class TestEncoder(seb.Encoder): + def encode( + self, + sentences: list[str], + batch_size: int, # noqa: ARG002 + task: seb.Task, + ) -> np.ndarray: + if task.task_type == "SNS": + return np.array([np.zeros(100) for _ in sentences]) + return np.array([np.ones(100) for _ in sentences]) + + def load_test_model() -> TestEncoder: + return TestEncoder() + + return seb.EmbeddingModel( + meta=seb.ModelMeta( + name="test_model_with_task_dependent_encode", embedding_size=100 + ), + loader=load_test_model, + ) + + +def create_all_is_0_task() -> seb.Task: + class TestTaskAllEmbeddingIsOne(TestTask): + name = "embeddings is one task" + task_type: str = "SNS" + + def evaluate(self, model: seb.Encoder) -> seb.TaskResult: + out = model.encode(["a test sentence"]) + assert np.all(out == 0) + + return seb.TaskResult( + task_name=self.name, + task_description="NA", + task_version="NA", + time_of_run=datetime.now(), + scores={"en": {"a_metric": 1.0}}, + main_score="a_metric", + ) + + return TestTaskAllEmbeddingIsOne() + + +def create_all_is_1_task() -> seb.Task: + class TestTaskAllEmbeddingIsZero(TestTask): + name = "all embeddings is 0 task" + task_type = "Classification" + + def evaluate(self, model: seb.Encoder) -> seb.TaskResult: + out = model.encode(["a test sentence"], task=self) + assert np.all(out == 0) + + return seb.TaskResult( + task_name=self.name, + task_description="NA", + task_version="NA", + time_of_run=datetime.now(), + scores={"en": {"a_metric": 1.0}}, + main_score="a_metric", + ) + + return TestTaskAllEmbeddingIsZero() + + +def test_task_dependent_encode(): + model = create_test_model_with_task_dependent_encode() + + tasks = [ + create_all_is_0_task(), + create_all_is_1_task(), + ] + + benchmark = seb.Benchmark(tasks=tasks) + result = benchmark.evaluate_model(model) + assert ( + result.get_main_score() == 1 + ), "both datasets should have score of 1 if they run successfully" diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 6a83c466..5eadacd9 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -67,7 +67,7 @@ def test_all_tasks(task_name: str, model_name: str): assert isinstance(task, seb.Task) assert isinstance(model, seb.EmbeddingModel) - assert isinstance(model.model, seb.ModelInterface) + assert isinstance(model.model, seb.Encoder) task_result = task.evaluate(model) assert isinstance(task_result, seb.TaskResult)