fix: run on ucloud

KennethEnevoldsen · Sep 25, 2023 · aebaa4e · aebaa4e
1 parent ac8551f
commit aebaa4e
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,4 @@ error_logs.txt
 # tmp tests files
 tests/tmpfiles/*
 results.zip
+seb_cache/
diff --git a/docs/run_benchmark.py b/docs/run_benchmark.py
@@ -5,7 +5,6 @@
     
     python run_benchmark.py --data-wrapper-api-token <token>
 """
-
 import argparse
 from typing import List
 
@@ -68,6 +67,7 @@ def benchmark_result_to_row(
     task_names = [t.task_name for t in sorted_tasks]
     scores = [get_main_score(t, langs) for t in sorted_tasks]  # type: ignore
 
+
     df = pd.DataFrame([scores], columns=task_names, index=[mdl_name])
     df["Average"] = np.mean(scores)  # type: ignore
     return df

diff --git a/src/seb/full_benchmark.py b/src/seb/full_benchmark.py
@@ -18,7 +18,7 @@
 }
 
 
-def run_benchmark(use_cache: bool = True) -> dict[str, List[BenchmarkResults]]:
+def run_benchmark(use_cache: bool = True, raise_errors: bool=True) -> dict[str, List[BenchmarkResults]]:
     """
     Run the full SEB benchmark.
     """
@@ -28,7 +28,7 @@ def run_benchmark(use_cache: bool = True) -> dict[str, List[BenchmarkResults]]:
     for subset, langs in BENCHMARKS.items():
         benchmark = Benchmark(languages=langs)
         bm_results = benchmark.evaluate_models(
-            models=models, use_cache=use_cache, raise_errors=False
+            models=models, use_cache=use_cache, raise_errors=raise_errors
         )
 
         results[subset] = bm_results

diff --git a/src/seb/seb_models/fairseq_models.py b/src/seb/seb_models/fairseq_models.py
@@ -57,6 +57,7 @@ def encode(
         self,
         input: Union[Path, Sequence[str]],
         batch_size: int,
+        **kwargs,
     ) -> torch.Tensor:
         from fairseq2.data import Collater  # type: ignore
         from fairseq2.data.data_pipeline import read_sequence  # type: ignore

diff --git a/tasks.py b/tasks.py
@@ -283,6 +283,42 @@ def install(
     c.run(install_cmd)
 
 
+@task
+def install_ucloud(
+    c: Context,
+    pip_args: str = "",
+    msg: bool = True,
+    venv_path: Optional[str] = None,
+):
+    """Install the project in editable mode using pip install for ucloud
+    tested using application: Coder Python 1.80.2
+    """
+    if msg:
+        echo_header(f"{msg_type.DOING} Installing project")
+
+    extras = ".[dev,tests,docs,sonar]"
+    install_cmd = f"pip install -e {extras} {pip_args}"
+
+    if venv_path is not None and NOT_WINDOWS:
+        with c.prefix(f"source {venv_path}/bin/activate"):
+            c.run(
+                "pip install git+https://github.com/embeddings-benchmark/mteb"
+            )  # TODO: remove after merge of https://github.com/embeddings-benchmark/mteb/pull/128
+            c.run(install_cmd)
+            return
+
+    c.run(install_cmd)
+
+    cache_dir = Path(__file__).parent / "seb_cache"
+    print(f"--- Settting SEB Cache Dir. to: ---")
+    print(f"\t{cache_dir.resolve()}")
+    c.run(f'echo "export SEB_CACHE_DIR={cache_dir.resolve()}" >> ~/.bashrc')
+
+    print("-- Installing required dependencies for FairSeq2 using apt --")
+    c.run("sudo apt update")
+    c.run("sudo apt install libsndfile1 -y")
+
+
 def get_python_path(preferred_version: str) -> Optional[str]:
     """Get path to python executable."""
     preferred_version_path = shutil.which(f"python{preferred_version}")