Skip to content

Commit

Permalink
Exclude strings from v3.2+ source vector checks (explosion#9697)
Browse files Browse the repository at this point in the history
Exclude strings from `Vector.to_bytes()` comparions for v3.2+ `Vectors`
that now include the string store so that the source vector comparison
is only comparing the vectors and not the strings.
  • Loading branch information
adrianeboyd authored and polm committed Nov 28, 2021
1 parent 8daf027 commit a1b6c1a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
7 changes: 5 additions & 2 deletions spacy/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,8 @@ def create_pipe_from_source(
if (
self.vocab.vectors.shape != source.vocab.vectors.shape
or self.vocab.vectors.key2row != source.vocab.vectors.key2row
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
or self.vocab.vectors.to_bytes(exclude=["strings"])
!= source.vocab.vectors.to_bytes(exclude=["strings"])
):
warnings.warn(Warnings.W113.format(name=source_name))
if source_name not in source.component_names:
Expand Down Expand Up @@ -1822,7 +1823,9 @@ def from_config(
)
if model not in source_nlp_vectors_hashes:
source_nlp_vectors_hashes[model] = hash(
source_nlps[model].vocab.vectors.to_bytes()
source_nlps[model].vocab.vectors.to_bytes(
exclude=["strings"]
)
)
if "_sourced_vectors_hashes" not in nlp.meta:
nlp.meta["_sourced_vectors_hashes"] = {}
Expand Down
2 changes: 1 addition & 1 deletion spacy/training/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def init_vocab(
logger.info(f"Added vectors: {vectors}")
# warn if source model vectors are not identical
sourced_vectors_hashes = nlp.meta.pop("_sourced_vectors_hashes", {})
vectors_hash = hash(nlp.vocab.vectors.to_bytes())
vectors_hash = hash(nlp.vocab.vectors.to_bytes(exclude=["strings"]))
for sourced_component, sourced_vectors_hash in sourced_vectors_hashes.items():
if vectors_hash != sourced_vectors_hash:
warnings.warn(Warnings.W113.format(name=sourced_component))
Expand Down

0 comments on commit a1b6c1a

Please sign in to comment.