Skip to content

Commit

Permalink
Replace np.multiply with np.square and copyedit in translation_matrix…
Browse files Browse the repository at this point in the history
….py (#3374)

* Replace np.multiply with np.square and copyedit

* Copyedit translation_matrix.py

Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
dymil and mpenkov authored Aug 22, 2022
1 parent 7f314ee commit 77c3a7f
Showing 1 changed file with 20 additions and 20 deletions.
40 changes: 20 additions & 20 deletions gensim/models/translation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
a standard nearest neighbour method or a globally corrected neighbour retrieval method [1]_.
This method can be used to augment the existing phrase tables with more candidate translations, or
filter out errors from the translation tables and known dictionaries [2]_. What's more, It also work
filter out errors from the translation tables and known dictionaries [2]_. What's more, it also works
for any two sets of named-vectors where there are some paired-guideposts to learn the transformation.
Examples
Expand All @@ -14,7 +14,7 @@
How to make translation between two set of word-vectors
=======================================================
Initialize a word-vector models
Initialize two word-vector models
.. sourcecode:: pycon
Expand All @@ -24,7 +24,7 @@
>>> model_en = KeyedVectors.load_word2vec_format(datapath("EN.1-10.cbow1_wind5_hs0_neg10_size300_smpl1e-05.txt"))
>>> model_it = KeyedVectors.load_word2vec_format(datapath("IT.1-10.cbow1_wind5_hs0_neg10_size300_smpl1e-05.txt"))
Define word pairs (that will be used for construction of translation matrix
Define word pairs (that will be used for construction of translation matrix)
.. sourcecode:: pycon
Expand Down Expand Up @@ -143,12 +143,12 @@ def build(cls, lang_vec, lexicon=None):
Object that stored word-vectors
"""
# `words` to store all the word that
# `mat` to store all the word vector for the word in 'words' list
# `words` to store all the words
# `mat` to store the word vector for each word in the 'words' list
words = []
mat = []
if lexicon is not None:
# if the lexicon is not provided, using the all the Keyedvectors's words as default
# if the lexicon is not provided, using all the Keyedvectors's words as default
for item in lexicon:
words.append(item)
mat.append(lang_vec.vectors[lang_vec.get_index(item)])
Expand All @@ -161,18 +161,18 @@ def build(cls, lang_vec, lexicon=None):
return Space(mat, words)

def normalize(self):
"""Normalize the word vector's matrix."""
self.mat = self.mat / np.sqrt(np.sum(np.multiply(self.mat, self.mat), axis=1, keepdims=True))
"""Normalize the word vectors matrix."""
self.mat = self.mat / np.sqrt(np.sum(np.square(self.mat), axis=1, keepdims=True))


class TranslationMatrix(utils.SaveLoad):
"""Objects of this class realize the translation matrix which map the source language to the target language.
"""Objects of this class realize the translation matrix which maps the source language to the target language.
The main methods are:
We map it to the other language space by computing z = Wx, then return the
word whose representation is close to z.
The details use seen the notebook [3]_
For details on use, see the tutorial notebook [3]_
Examples
--------
Expand Down Expand Up @@ -234,7 +234,7 @@ def __init__(self, source_lang_vec, target_lang_vec, word_pairs=None, random_sta
self.train(word_pairs)

def train(self, word_pairs):
"""Build the translation matrix that mapping from source space to target space.
"""Build the translation matrix to map from source space to target space.
Parameters
----------
Expand Down Expand Up @@ -289,7 +289,7 @@ def translate(self, source_words, topn=5, gc=0, sample_num=None, source_lang_vec
Define translation algorithm, if `gc == 0` - use standard NN retrieval,
otherwise, use globally corrected neighbour retrieval method (as described in [1]_).
sample_num : int, optional
Number of word to sample from the source lexicon, if `gc == 1`, then `sample_num` **must** be provided.
Number of words to sample from the source lexicon, if `gc == 1`, then `sample_num` **must** be provided.
source_lang_vec : :class:`~gensim.models.keyedvectors.KeyedVectors`, optional
New source language vectors for translation, by default, used the model's source language vector.
target_lang_vec : :class:`~gensim.models.keyedvectors.KeyedVectors`, optional
Expand Down Expand Up @@ -366,15 +366,15 @@ def translate(self, source_words, topn=5, gc=0, sample_num=None, source_lang_vec


class BackMappingTranslationMatrix(utils.SaveLoad):
"""Realize the BackMapping translation matrix which map the source model's document vector
to the target model's document vector(old model).
"""Realize the BackMapping translation matrix which maps the source model's document vector
to the target model's document vector (old model).
BackMapping translation matrix is used to learn a mapping for two document vector space which we
specify as source document vector and target document vector. The target document vector are trained
on superset corpus of source document vector, we can incrementally increase the vector in
BackMapping translation matrix is used to learn a mapping for two document vector spaces which we
specify as source document vector and target document vector. The target document vectors are trained
on a superset corpus of source document vectors; we can incrementally increase the vector in
the old model through the BackMapping translation matrix.
the details use seen the notebook [3]_.
For details on use, see the tutorial notebook [3]_.
Examples
--------
Expand Down Expand Up @@ -421,7 +421,7 @@ def __init__(self, source_lang_vec, target_lang_vec, tagged_docs=None, random_st
self.train(tagged_docs)

def train(self, tagged_docs):
"""Build the translation matrix that mapping from the source model's vector to target model's vector
"""Build the translation matrix to map from the source model's vectors to target model's vectors
Parameters
----------
Expand All @@ -432,7 +432,7 @@ def train(self, tagged_docs):
Returns
-------
numpy.ndarray
Translation matrix that mapping from the source model's vector to target model's vector.
Translation matrix that maps from the source model's vectors to target model's vectors.
"""
m1 = [self.source_lang_vec.dv[item.tags].flatten() for item in tagged_docs]
Expand Down

0 comments on commit 77c3a7f

Please sign in to comment.