Skip to content

Commit

Permalink
minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Jul 18, 2024
1 parent 8f523f1 commit d14d6a9
Show file tree
Hide file tree
Showing 21 changed files with 26 additions and 8 deletions.
16 changes: 16 additions & 0 deletions src/seb/cache/BAAI__bge-m3/Angry_Tweets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"task_name": "Angry Tweets",
"task_description": "A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets",
"task_version": "1.1.1",
"time_of_run": "2024-07-18T14:17:30.210659",
"scores": {
"da": {
"accuracy": 0.5744030563514804,
"f1": 0.5639609605319712,
"accuracy_stderr": 0.0232317583970707,
"f1_stderr": 0.02040731181518541,
"main_score": 0.5744030563514804
}
},
"main_score": "accuracy"
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion src/seb/cache/bge-m3/Angry_Tweets.json

This file was deleted.

1 change: 1 addition & 0 deletions src/seb/registered_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .openai_models import *
from .translate_e5_models import *
from .voyage_models import *
from .bge_models import *
13 changes: 8 additions & 5 deletions src/seb/registered_models/bge_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ def encode( # type: ignore
if "task" in kwargs:
kwargs.pop("task")

return np.asarray(self.mdl.encode(sentences, batch_size=batch_size, **kwargs))
if "convert_to_tensor" in kwargs:
kwargs.pop("convert_to_tensor")

return np.asarray(self.mdl.encode(sentences, batch_size=batch_size, convert_to_numpy=True, **kwargs))

def encode_queries(self, queries: list[str], batch_size: int = 32, **kwargs: Any) -> np.ndarray:
if "task" in kwargs:
Expand Down Expand Up @@ -61,17 +64,17 @@ def encode_corpus(
return emb # type: ignore


@models.register("BAAI/bge-m3")
@models.register("bge-m3")
def create_bge_m3() -> SebModel:
hf_name = "BAAI/bge-m3"
meta = ModelMeta(
name=hf_name,
name="bge-m3",
huggingface_name=hf_name,
reference=f"https://huggingface.co/{hf_name}",
languages=[],
open_source=False,
open_source=True,
embedding_size=1024,
architecture="API",
architecture="XLM-R",
release_date=date(2024, 5, 28),
)
return SebModel(
Expand Down
3 changes: 1 addition & 2 deletions src/seb/registered_models/sentence_transformer_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,9 +494,8 @@ def create_use_cmlm_multilingual() -> SebModel:
name=hf_name.split("/")[-1],
huggingface_name=hf_name,
reference=f"https://huggingface.co/{hf_name}",
languages=["da"],
open_source=True,
embedding_size=768,
embedding_size=768,
architecture="BERT",
release_date=date(2022, 4, 14),
)
Expand Down

0 comments on commit d14d6a9

Please sign in to comment.