Skip to content

Commit

Permalink
support embedding normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
SeanLee97 committed Jul 18, 2024
1 parent 191ca1b commit d584d24
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
6 changes: 5 additions & 1 deletion angle_emb/angle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,7 +1550,8 @@ def encode(self,
embedding_start: int = 0,
embedding_size: Optional[int] = None,
device: Optional[Any] = None,
prompt: Optional[str] = None):
prompt: Optional[str] = None,
normalize_embedding: bool = False):
"""
encode texts.
Expand All @@ -1563,6 +1564,7 @@ def encode(self,
The embeddings from embedding_start to embedding_start+embedding_size will be returned.
:param device: Optional[Any]. Default None.
:param prompt: Optional[str]. Default None.
:param normalize_embedding: bool. Default False.
"""
if layer_index != -1 and self.full_backbone is None:
self.full_backbone = copy.deepcopy(self.backbone)
Expand Down Expand Up @@ -1605,6 +1607,8 @@ def encode(self,
layer_index=layer_index,
embedding_start=embedding_start,
embedding_size=embedding_size)
if normalize_embedding:
output = nn.functional.normalize(output, p=2, dim=-1)
if to_numpy:
return output.float().detach().cpu().numpy()
return output
Expand Down
9 changes: 9 additions & 0 deletions tests/test_loadding.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ def test_2dmse_loadding():
assert isinstance(vecs, np.ndarray)
vecs = angle.encode(['hello world', 'hi there👋'], layer_index=20, embedding_size=512)
assert isinstance(vecs, np.ndarray)


def test_normalize_embedding():
import numpy as np
from angle_emb import AnglE

angle = AnglE.from_pretrained('WhereIsAI/UAE-Large-V1')
vecs = angle.encode('hello world', normalize_embedding=True)
assert isinstance(vecs, np.ndarray)

0 comments on commit d584d24

Please sign in to comment.