Skip to content

Commit

Permalink
fix: run on ucloud
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Sep 25, 2023
1 parent ac8551f commit aebaa4e
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ error_logs.txt
# tmp tests files
tests/tmpfiles/*
results.zip
seb_cache/
2 changes: 1 addition & 1 deletion docs/run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
python run_benchmark.py --data-wrapper-api-token <token>
"""

import argparse
from typing import List

Expand Down Expand Up @@ -68,6 +67,7 @@ def benchmark_result_to_row(
task_names = [t.task_name for t in sorted_tasks]
scores = [get_main_score(t, langs) for t in sorted_tasks] # type: ignore


df = pd.DataFrame([scores], columns=task_names, index=[mdl_name])
df["Average"] = np.mean(scores) # type: ignore
return df
Expand Down
4 changes: 2 additions & 2 deletions src/seb/full_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
}


def run_benchmark(use_cache: bool = True) -> dict[str, List[BenchmarkResults]]:
def run_benchmark(use_cache: bool = True, raise_errors: bool=True) -> dict[str, List[BenchmarkResults]]:
"""
Run the full SEB benchmark.
"""
Expand All @@ -28,7 +28,7 @@ def run_benchmark(use_cache: bool = True) -> dict[str, List[BenchmarkResults]]:
for subset, langs in BENCHMARKS.items():
benchmark = Benchmark(languages=langs)
bm_results = benchmark.evaluate_models(
models=models, use_cache=use_cache, raise_errors=False
models=models, use_cache=use_cache, raise_errors=raise_errors
)

results[subset] = bm_results
Expand Down
1 change: 1 addition & 0 deletions src/seb/seb_models/fairseq_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def encode(
self,
input: Union[Path, Sequence[str]],
batch_size: int,
**kwargs,
) -> torch.Tensor:
from fairseq2.data import Collater # type: ignore
from fairseq2.data.data_pipeline import read_sequence # type: ignore
Expand Down
36 changes: 36 additions & 0 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,42 @@ def install(
c.run(install_cmd)


@task
def install_ucloud(
c: Context,
pip_args: str = "",
msg: bool = True,
venv_path: Optional[str] = None,
):
"""Install the project in editable mode using pip install for ucloud
tested using application: Coder Python 1.80.2
"""
if msg:
echo_header(f"{msg_type.DOING} Installing project")

extras = ".[dev,tests,docs,sonar]"
install_cmd = f"pip install -e {extras} {pip_args}"

if venv_path is not None and NOT_WINDOWS:
with c.prefix(f"source {venv_path}/bin/activate"):
c.run(
"pip install git+https://github.com/embeddings-benchmark/mteb"
) # TODO: remove after merge of https://github.com/embeddings-benchmark/mteb/pull/128
c.run(install_cmd)
return

c.run(install_cmd)

cache_dir = Path(__file__).parent / "seb_cache"
print(f"--- Settting SEB Cache Dir. to: ---")
print(f"\t{cache_dir.resolve()}")
c.run(f'echo "export SEB_CACHE_DIR={cache_dir.resolve()}" >> ~/.bashrc')

print("-- Installing required dependencies for FairSeq2 using apt --")
c.run("sudo apt update")
c.run("sudo apt install libsndfile1 -y")


def get_python_path(preferred_version: str) -> Optional[str]:
"""Get path to python executable."""
preferred_version_path = shutil.which(f"python{preferred_version}")
Expand Down

0 comments on commit aebaa4e

Please sign in to comment.