Skip to content

Commit

Permalink
feat: Added speed task for estimating the speed of the embedding models
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Jan 19, 2024
1 parent da32c0e commit 25caacc
Show file tree
Hide file tree
Showing 11 changed files with 272 additions and 11 deletions.
5 changes: 3 additions & 2 deletions src/seb/cli/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from rich.table import Table

import seb
from seb.types import Language


def get_main_score(task: seb.TaskResult, langs: Optional[list[str]]) -> float:
if langs is None:
def get_main_score(task: seb.TaskResult, langs: Optional[list[Language]]) -> float:
if langs is None: # noqa
_langs = task.languages
else:
_langs = set(langs) & set(task.languages)
Expand Down
37 changes: 37 additions & 0 deletions src/seb/full_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
"""


import logging
from pathlib import Path
from typing import Optional

from .benchmark import Benchmark
from .interfaces.model import EmbeddingModel
from .registered_tasks.speed import CPUSpeedTask, GPUSpeedTask
from .registries import get_all_models
from .result_dataclasses import BenchmarkResults

logger = logging.getLogger(__name__)

BENCHMARKS = {
"Mainland Scandinavian": ["da", "sv", "nn", "nb"],
"Danish": ["da"],
Expand Down Expand Up @@ -44,3 +48,36 @@ def run_benchmark(
results[subset] = bm_results

return results


def run_speed_benchmark(
use_cache: bool = True,
run_models: bool = True,
raise_errors: bool = True,
cache_dir: Optional[Path] = None,
) -> dict[str, list[BenchmarkResults]]:
"""
Run the speed benchmark.
"""
models: list[EmbeddingModel] = get_all_models()
tasks = [CPUSpeedTask(), GPUSpeedTask()] # type: ignore

if use_cache:
logger.warn(
"Running the speed benchmark with use_cache=True will load speed results from the cache, this might lead to incomparable results."
)

results = {}
for subset, langs in BENCHMARKS.items():
benchmark = Benchmark(languages=langs)
bm_results = benchmark.evaluate_models(
models=models,
use_cache=use_cache,
run_model=run_models,
raise_errors=raise_errors,
cache_dir=cache_dir,
)

results[subset] = bm_results

return results
1 change: 1 addition & 0 deletions src/seb/interfaces/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def encode(
**kwargs: Any,
) -> ArrayLike:
"""Returns a list of embeddings for the given sentences.
Args:
sentences: List of sentences to encode
task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need
Expand Down
6 changes: 3 additions & 3 deletions src/seb/interfaces/mteb_task.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from datetime import datetime
from typing import Any
from typing import Any, Union

import numpy as np
from datasets import DatasetDict, concatenate_datasets
from mteb import AbsTask
from mteb import __version__ as mteb_version

from ..result_dataclasses import TaskResult
from ..types import ArrayLike
from ..types import ArrayLike, Language
from .model import Encoder
from .task import DescriptiveDatasetStats, Task

Expand Down Expand Up @@ -88,7 +88,7 @@ def evaluate(self, model: Encoder) -> TaskResult:
scores = scores.get(split, scores)
score_is_nested = isinstance(scores[next(iter(scores.keys()))], dict)
if not score_is_nested:
_scores = {lang: scores for lang in self.languages}
_scores: dict[str, dict[str, Union[float, str]]] = {lang: scores for lang in self.languages}
scores = _scores

task_result = TaskResult(
Expand Down
2 changes: 2 additions & 0 deletions src/seb/interfaces/task.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Protocol, runtime_checkable

from attr import dataclass

from ..result_dataclasses import TaskResult
from ..types import DescriptiveDatasetStats, Domain, Language, TaskType
from .model import Encoder
Expand Down
103 changes: 103 additions & 0 deletions src/seb/registered_tasks/speed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import logging
import platform
import time
from datetime import datetime
from pathlib import Path
from typing import Optional, Union

import numpy as np
import psutil
import torch

from seb.interfaces.model import EmbeddingModel
from seb.interfaces.task import Task
from seb.result_dataclasses import TaskResult
from seb.types import DescriptiveDatasetStats, languages_in_seb

logger = logging.getLogger(__name__)


class CPUSpeedTask(Task):
reference = "NA"
version = "0.0.1"
task_type = "Speed"
languages = languages_in_seb
main_score = "Inference speed (seconds)"
domain = ["fiction"] # noqa
name = "Speed (CPU)"
description = "Time taken to encode the text 'The Ugly Duckling' split by paragraphs on a CPU."
device = "cpu"
_dataset: Optional[list[str]] = None

def load_dataset(self) -> list[str]:
file_path = Path(__file__).parent / "the_ugly_duckling.txt"
with file_path.open("r") as f:
text = f.read()
return text.split("\n\n")

@property
def dataset(self) -> list[str]:
if self._dataset is None:
self._dataset = self.load_dataset()
return self._dataset

def get_descriptive_stats(self) -> DescriptiveDatasetStats:
dataset = self.load_dataset()
lengths = np.array([len(x) for x in dataset])
return DescriptiveDatasetStats(
mean_document_length=float(np.mean(lengths)), std_document_length=float(np.std(lengths)), num_documents=len(dataset)
)

def get_time_taken(self, model: EmbeddingModel) -> float:
dataset = self.load_dataset()
start = time.time()
with torch.no_grad():
model.encode(dataset, batch_size=1, device=self.device, task=self)
time_taken = time.time() - start
return time_taken

def evaluate(self, model: EmbeddingModel) -> TaskResult:
model.loader() # ensure model is loaded

has_to_method = callable(getattr(model._model, "to", None))
if has_to_method:
model = model.to(self.device) # type: ignore

run_inference = not (self.device == "cuda" and not has_to_method)
if run_inference:
time_taken = self.get_time_taken(model)
else:
logger.warn(f"Could not run inference on {model.meta.name} on {self.device} as it does not have a 'to' method. Skipping")
time_taken = np.nan

scores: dict[str, Union[str, float]] = {self.main_score: time_taken, **self.get_system_info()}

return TaskResult(
task_name=self.name,
task_description=self.description,
task_version=self.version,
scores={Language: scores for Language in self.languages},
time_of_run=datetime.now(),
main_score=self.main_score,
)

def get_system_info(self) -> dict[str, str]:
"""
Returns a dictionary with system information.
"""
info = {}
info["platform"] = platform.system()
info["platform-release"] = platform.release()
info["platform-version"] = platform.version()
info["architecture"] = platform.machine()
info["processor"] = platform.processor()
info["ram"] = str(round(psutil.virtual_memory().total / (1024.0**3))) + " GB"
info["Physical cores"] = psutil.cpu_count(logical=False)
info["Total cores"] = psutil.cpu_count(logical=True)
return info


class GPUSpeedTask(CPUSpeedTask):
name = "Speed (GPU)"
description = "Time taken to encode the text 'The Ugly Duckling' split by paragraphs on a GPU."
device: str = "cuda"
Loading

0 comments on commit 25caacc

Please sign in to comment.