From e4f3d956de29aa400661da70e1a0548f94ca9fb5 Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Wed, 29 May 2019 15:06:30 +0900
Subject: [PATCH 01/15] add build_vocab to poincare model

---
 gensim/models/poincare.py    | 133 ++++++++++++++++++++++++++++-------
 gensim/test/test_poincare.py |  15 ++++
 2 files changed, 124 insertions(+), 24 deletions(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 0c49c761f2..c4180572d5 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -152,6 +152,10 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil
         """
         self.train_data = train_data
         self.kv = PoincareKeyedVectors(size)
+        self.all_relations = []
+        self.node_relations = defaultdict(set)
+        self._negatives_buffer = NegativesBuffer([])
+        self._negatives_buffer_size = 2000
         self.size = size
         self.train_alpha = alpha  # Learning rate for training
         self.burn_in_alpha = burn_in_alpha  # Learning rate for burn-in
@@ -167,47 +171,48 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil
         self._np_random = np_random.RandomState(seed)
         self.init_range = init_range
         self._loss_grad = None
-        self._load_relations()
-        self._init_embeddings()
+        self.build_vocab(train_data)
 
-    def _load_relations(self):
+    def build_vocab(self, relations=None, update=False):
         """Load relations from the train data and build vocab."""
-        vocab = {}
-        index2word = []
-        all_relations = []  # List of all relation pairs
-        node_relations = defaultdict(set)  # Mapping from node index to its related node indices
+        old_index2word_len = len(self.kv.index2word)
 
         logger.info("loading relations from train data..")
-        for relation in self.train_data:
+        for relation in relations:
             if len(relation) != 2:
                 raise ValueError('Relation pair "%s" should have exactly two items' % repr(relation))
             for item in relation:
-                if item in vocab:
-                    vocab[item].count += 1
+                if item in self.kv.vocab:
+                    self.kv.vocab[item].count += 1
                 else:
-                    vocab[item] = Vocab(count=1, index=len(index2word))
-                    index2word.append(item)
+                    self.kv.vocab[item] = Vocab(count=1, index=len(self.kv.index2word))
+                    self.kv.index2word.append(item)
             node_1, node_2 = relation
-            node_1_index, node_2_index = vocab[node_1].index, vocab[node_2].index
-            node_relations[node_1_index].add(node_2_index)
+            node_1_index, node_2_index = self.kv.vocab[node_1].index, self.kv.vocab[node_2].index
+            self.node_relations[node_1_index].add(node_2_index)
             relation = (node_1_index, node_2_index)
-            all_relations.append(relation)
-        logger.info("loaded %d relations from train data, %d nodes", len(all_relations), len(vocab))
-        self.kv.vocab = vocab
-        self.kv.index2word = index2word
-        self.indices_set = set(range(len(index2word)))  # Set of all node indices
-        self.indices_array = np.fromiter(range(len(index2word)), dtype=int)  # Numpy array of all node indices
-        self.all_relations = all_relations
-        self.node_relations = node_relations
+            self.all_relations.append(relation)
+        logger.info("loaded %d relations from train data, %d nodes", len(self.all_relations), len(self.kv.vocab))
+        self.indices_set = set(range(len(self.kv.index2word)))  # Set of all node indices
+        self.indices_array = np.fromiter(range(len(self.kv.index2word)), dtype=int)  # Numpy array of all node indices
         self._init_node_probabilities()
-        self._negatives_buffer = NegativesBuffer([])  # Buffer for negative samples, to reduce calls to sampling method
-        self._negatives_buffer_size = 2000
+
+        if not update:
+            self._init_embeddings()
+        else:
+            self._update_embeddings(old_index2word_len)
 
     def _init_embeddings(self):
         """Randomly initialize vectors for the items in the vocab."""
         shape = (len(self.kv.index2word), self.size)
         self.kv.syn0 = self._np_random.uniform(self.init_range[0], self.init_range[1], shape).astype(self.dtype)
 
+    def _update_embeddings(self, old_index2word_len):
+        """Randomly initialize vectors for the items in the additional vocab."""
+        shape = (len(self.kv.index2word) - old_index2word_len, self.size)
+        v = self._np_random.uniform(self.init_range[0], self.init_range[1], shape).astype(self.dtype)
+        self.kv.syn0 = np.concatenate([self.kv.syn0, v])
+
     def _init_node_probabilities(self):
         """Initialize a-priori probabilities."""
         counts = np.fromiter((
@@ -564,6 +569,85 @@ def _update_vectors_batch(self, batch):
         self.kv.syn0[indices_v] -= v_updates
         self.kv.syn0[indices_v] = self._clip_vectors(self.kv.syn0[indices_v], self.epsilon)
 
+    def _build_vocab(self, relations=None, corpus_file=None, update=False, progress_per=10000, keep_raw_vocab=False,
+                    trim_rule=None, **kwargs):
+        """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
+        Each sentence must be a list of unicode strings.
+
+        Parameters
+        ----------
+        relations : iterable of list of str, optional
+            Can be simply a list of lists of tokens, but for larger corpora,
+            consider an iterable that streams the sentences directly from disk/network.
+            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
+            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
+        corpus_file : str, optional
+            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
+            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
+            `corpus_file` arguments need to be passed (not both of them).
+        update : bool
+            If true, the new words in `sentences` will be added to model's vocab.
+        progress_per : int
+            Indicates how many words to process before showing/updating the progress.
+        keep_raw_vocab : bool
+            If not true, delete the raw vocabulary after the scaling is done and free up RAM.
+        trim_rule : function, optional
+            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
+            be trimmed away, or handled using the default (discard if word count < min_count).
+            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
+            or a callable that accepts parameters (word, count, min_count) and returns either
+            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
+            The rule, if given, is only used to prune vocabulary during
+            :meth:`~gensim.models.fasttext.FastText.build_vocab` and is not stored as part of the model.
+
+            The input parameters are of the following types:
+                * `word` (str) - the word we are examining
+                * `count` (int) - the word's frequency count in the corpus
+                * `min_count` (int) - the minimum count threshold.
+
+        **kwargs
+            Additional key word parameters passed to
+            :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.build_vocab`.
+
+        Examples
+        --------
+        Train a model and update vocab for online training:
+
+        .. sourcecode:: pycon
+
+            >>> from gensim.models import FastText
+            >>> relations_1 = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
+            >>> relations_2 = [('gib', 'cat')]
+            >>>
+            >>> model = PoincareModel(relations, negative=2)
+            >>> model.build_vocab(relations_1)
+            >>> model.train(relations_1, epochs=5)
+            >>>
+            >>> model.build_vocab(relations_2, update=True)
+            >>> model.train(relations_2, epochs=5)
+
+        """
+        if not update:
+            self.wv.init_ngrams_weights(self.trainables.seed)
+        elif not len(self.wv.vocab):
+            raise RuntimeError(
+                "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
+                "First build the vocabulary of your model with a corpus "
+                "by calling the gensim.models.fasttext.FastText.build_vocab method "
+                "before doing an online update."
+            )
+        else:
+            self.vocabulary.old_vocab_len = len(self.wv.vocab)
+
+        retval = super(FastText, self).build_vocab(
+            sentences=sentences, corpus_file=corpus_file, update=update, progress_per=progress_per,
+            keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
+
+        if update:
+            self.wv.update_ngrams_weights(self.trainables.seed, self.vocabulary.old_vocab_len)
+
+        return retval
+
     def train(self, epochs, batch_size=10, print_every=1000, check_gradients_every=None):
         """Train Poincare embeddings using loaded data and model parameters.
 
@@ -829,6 +913,7 @@ def __init__(self, vector_size):
         super(PoincareKeyedVectors, self).__init__(vector_size)
         self.max_distance = 0
         self.index2word = []
+        self.vocab = {}
 
     @property
     def vectors(self):
diff --git a/gensim/test/test_poincare.py b/gensim/test/test_poincare.py
index c057c81bf0..f2f51a576d 100644
--- a/gensim/test/test_poincare.py
+++ b/gensim/test/test_poincare.py
@@ -93,6 +93,21 @@ def test_persistence_separate_file(self):
         loaded = PoincareModel.load(testfile())
         self.models_equal(model, loaded)
 
+    def test_online_learning(self):
+        """Tests whether additional input data is loaded correctly and completely."""
+        model = PoincareModel(self.data, burn_in=0, negative=3)
+        self.assertEqual(len(model.kv.vocab), 7)
+        self.assertEqual(model.kv.vocab['kangaroo.n.01'].count, 3)
+        self.assertEqual(model.kv.vocab['cat.n.01'].count, 1)
+        model.train(epochs=2)
+        old_vectors = np.copy(model.kv.syn0)
+
+        model.build_vocab([('kangaroo.n.01', 'cat.n.01')], update=True)  # update vocab
+        self.assertEqual(model.kv.vocab['kangaroo.n.01'].count, 4)
+        self.assertEqual(model.kv.vocab['cat.n.01'].count, 2)
+        model.train(epochs=2)
+        self.assertFalse(np.allclose(old_vectors, model.kv.syn0))
+
     def test_train_after_load(self):
         """Tests whether the model can be trained correctly after loading from disk."""
         model = PoincareModel(self.data, burn_in=0, negative=3)

From 8ed8ab3dadced45677707f29d1458cb3f8f5a20d Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Wed, 29 May 2019 15:08:53 +0900
Subject: [PATCH 02/15] delete unused func

---
 gensim/models/poincare.py | 79 ---------------------------------------
 1 file changed, 79 deletions(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index c4180572d5..45905afab4 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -569,85 +569,6 @@ def _update_vectors_batch(self, batch):
         self.kv.syn0[indices_v] -= v_updates
         self.kv.syn0[indices_v] = self._clip_vectors(self.kv.syn0[indices_v], self.epsilon)
 
-    def _build_vocab(self, relations=None, corpus_file=None, update=False, progress_per=10000, keep_raw_vocab=False,
-                    trim_rule=None, **kwargs):
-        """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
-        Each sentence must be a list of unicode strings.
-
-        Parameters
-        ----------
-        relations : iterable of list of str, optional
-            Can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
-        corpus_file : str, optional
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
-            `corpus_file` arguments need to be passed (not both of them).
-        update : bool
-            If true, the new words in `sentences` will be added to model's vocab.
-        progress_per : int
-            Indicates how many words to process before showing/updating the progress.
-        keep_raw_vocab : bool
-            If not true, delete the raw vocabulary after the scaling is done and free up RAM.
-        trim_rule : function, optional
-            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
-            be trimmed away, or handled using the default (discard if word count < min_count).
-            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
-            or a callable that accepts parameters (word, count, min_count) and returns either
-            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
-            The rule, if given, is only used to prune vocabulary during
-            :meth:`~gensim.models.fasttext.FastText.build_vocab` and is not stored as part of the model.
-
-            The input parameters are of the following types:
-                * `word` (str) - the word we are examining
-                * `count` (int) - the word's frequency count in the corpus
-                * `min_count` (int) - the minimum count threshold.
-
-        **kwargs
-            Additional key word parameters passed to
-            :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.build_vocab`.
-
-        Examples
-        --------
-        Train a model and update vocab for online training:
-
-        .. sourcecode:: pycon
-
-            >>> from gensim.models import FastText
-            >>> relations_1 = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
-            >>> relations_2 = [('gib', 'cat')]
-            >>>
-            >>> model = PoincareModel(relations, negative=2)
-            >>> model.build_vocab(relations_1)
-            >>> model.train(relations_1, epochs=5)
-            >>>
-            >>> model.build_vocab(relations_2, update=True)
-            >>> model.train(relations_2, epochs=5)
-
-        """
-        if not update:
-            self.wv.init_ngrams_weights(self.trainables.seed)
-        elif not len(self.wv.vocab):
-            raise RuntimeError(
-                "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
-                "First build the vocabulary of your model with a corpus "
-                "by calling the gensim.models.fasttext.FastText.build_vocab method "
-                "before doing an online update."
-            )
-        else:
-            self.vocabulary.old_vocab_len = len(self.wv.vocab)
-
-        retval = super(FastText, self).build_vocab(
-            sentences=sentences, corpus_file=corpus_file, update=update, progress_per=progress_per,
-            keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
-
-        if update:
-            self.wv.update_ngrams_weights(self.trainables.seed, self.vocabulary.old_vocab_len)
-
-        return retval
-
     def train(self, epochs, batch_size=10, print_every=1000, check_gradients_every=None):
         """Train Poincare embeddings using loaded data and model parameters.
 

From 7bef962cb0ccfb346869da01c31f9e09af8b5bfd Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Wed, 29 May 2019 15:14:22 +0900
Subject: [PATCH 03/15] fix test

---
 gensim/test/test_poincare.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/gensim/test/test_poincare.py b/gensim/test/test_poincare.py
index f2f51a576d..9ea020da51 100644
--- a/gensim/test/test_poincare.py
+++ b/gensim/test/test_poincare.py
@@ -99,14 +99,9 @@ def test_online_learning(self):
         self.assertEqual(len(model.kv.vocab), 7)
         self.assertEqual(model.kv.vocab['kangaroo.n.01'].count, 3)
         self.assertEqual(model.kv.vocab['cat.n.01'].count, 1)
-        model.train(epochs=2)
-        old_vectors = np.copy(model.kv.syn0)
-
         model.build_vocab([('kangaroo.n.01', 'cat.n.01')], update=True)  # update vocab
         self.assertEqual(model.kv.vocab['kangaroo.n.01'].count, 4)
         self.assertEqual(model.kv.vocab['cat.n.01'].count, 2)
-        model.train(epochs=2)
-        self.assertFalse(np.allclose(old_vectors, model.kv.syn0))
 
     def test_train_after_load(self):
         """Tests whether the model can be trained correctly after loading from disk."""

From 304b6318af088bb7e0cfecaedf5fd47e930b7a0e Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Fri, 21 Jun 2019 17:30:28 +0900
Subject: [PATCH 04/15] fix TypeError

add doc
---
 gensim/models/poincare.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 45905afab4..d3f55d942d 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -173,8 +173,34 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil
         self._loss_grad = None
         self.build_vocab(train_data)
 
-    def build_vocab(self, relations=None, update=False):
-        """Load relations from the train data and build vocab."""
+    def build_vocab(self, relations, update=False):
+        """Build vocabulary from a relations.
+        Each relations must be a tuples of unicode strings.
+
+        Parameters
+        ----------
+        relations : list of tuples
+            List of tuples of positive examples of the form (node_1_index, node_2_index).
+        update : bool
+            If true, the new nodes in `relations` will be added to model's vocab.
+
+        Examples
+        --------
+        Train a model and update vocab for online training:
+
+        .. sourcecode:: pycon
+
+            >>> from gensim.models.poincare import PoincareModel
+            >>> relations_1 = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
+            >>> relations_2 = [('striped_skunk', 'mammal')]
+            >>>
+            >>> model = PoincareModel(relations_1, negative=1)
+            >>> model.train(epochs=50)
+            >>>
+            >>> model.build_vocab(relations_2, update=True)
+            >>> model.train(epochs=50)
+
+        """
         old_index2word_len = len(self.kv.index2word)
 
         logger.info("loading relations from train data..")

From cf312ccc9990003938c15f3fb02abfb13147b6fc Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Fri, 21 Jun 2019 17:45:39 +0900
Subject: [PATCH 05/15] mod description of relations in build_vocab

---
 gensim/models/poincare.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index d3f55d942d..45dd89da2a 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -175,12 +175,14 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil
 
     def build_vocab(self, relations, update=False):
         """Build vocabulary from a relations.
-        Each relations must be a tuples of unicode strings.
 
         Parameters
         ----------
-        relations : list of tuples
-            List of tuples of positive examples of the form (node_1_index, node_2_index).
+        relations : {iterable of (str, str), :class:`gensim.models.poincare.PoincareRelations`}
+            Iterable of relations, e.g. a list of tuples, or a :class:`gensim.models.poincare.PoincareRelations`
+            instance streaming from a file. Note that the relations are treated as ordered pairs,
+            i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric,
+            the data should contain both relations (a, b) and (b, a).
         update : bool
             If true, the new nodes in `relations` will be added to model's vocab.
 

From fba43e9d24c2d1a5a836ff173c246ebcd98b344d Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Fri, 21 Jun 2019 18:08:31 +0900
Subject: [PATCH 06/15] add update=False description in build_vocab

---
 gensim/models/poincare.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index c88c41088e..61db094682 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -185,7 +185,8 @@ def build_vocab(self, relations, update=False):
             i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric,
             the data should contain both relations (a, b) and (b, a).
         update : bool
-            If true, the new nodes in `relations` will be added to model's vocab.
+            If true, new nodes's embeddings are initialized.
+            If false, all node's embeddings are initialized.
 
         Examples
         --------

From eeac67f4ca1a9c05b9d4b601c3350e3efe9287fc Mon Sep 17 00:00:00 2001
From: "kojiro.iizuka" <kojiro.iizuka@gunosy.com>
Date: Fri, 21 Jun 2019 18:11:19 +0900
Subject: [PATCH 07/15] mod top description of build_vocab

---
 gensim/models/poincare.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 61db094682..d57fe31cd0 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -175,7 +175,7 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil
         self.build_vocab(train_data)
 
     def build_vocab(self, relations, update=False):
-        """Build vocabulary from a relations.
+        """Build vocabulary from relations.
 
         Parameters
         ----------

From de0688d52703617eae76697e9846e2b64dcda2c6 Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:19:45 +0900
Subject: [PATCH 08/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index d57fe31cd0..da4a6cd170 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -175,7 +175,7 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil
         self.build_vocab(train_data)
 
     def build_vocab(self, relations, update=False):
-        """Build vocabulary from relations.
+        """Build the model's vocabulary from known relations.
 
         Parameters
         ----------

From e8d7fb9c247597316e2fae79fcb199f07de61997 Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:20:31 +0900
Subject: [PATCH 09/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index da4a6cd170..41adf48d38 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -184,7 +184,7 @@ def build_vocab(self, relations, update=False):
             instance streaming from a file. Note that the relations are treated as ordered pairs,
             i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric,
             the data should contain both relations (a, b) and (b, a).
-        update : bool
+        update : bool, optional
             If true, new nodes's embeddings are initialized.
             If false, all node's embeddings are initialized.
 

From afd0e21189f41e5358aedd6fe335b4bbe889844d Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:20:39 +0900
Subject: [PATCH 10/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 41adf48d38..e85e674cf3 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -185,7 +185,8 @@ def build_vocab(self, relations, update=False):
             i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric,
             the data should contain both relations (a, b) and (b, a).
         update : bool, optional
-            If true, new nodes's embeddings are initialized.
+            If true, only new nodes's embeddings are initialized.
+            Use this when the model already has an existing vocabulary and you want to update it.
             If false, all node's embeddings are initialized.
 
         Examples

From 666b93cddd818f23f69c0ea9f73052a3a87cbc7c Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:20:59 +0900
Subject: [PATCH 11/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index e85e674cf3..1da93ca52e 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -188,6 +188,7 @@ def build_vocab(self, relations, update=False):
             If true, only new nodes's embeddings are initialized.
             Use this when the model already has an existing vocabulary and you want to update it.
             If false, all node's embeddings are initialized.
+            Use this when you're creating a new vocabulary from scratch.
 
         Examples
         --------

From 32198846e5253b9f62b1b18a53a5b519d975a716 Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:22:41 +0900
Subject: [PATCH 12/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 1da93ca52e..0cac6cb1cf 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -200,7 +200,8 @@ def build_vocab(self, relations, update=False):
             >>> relations_1 = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
             >>> relations_2 = [('striped_skunk', 'mammal')]
             >>>
-            >>> model = PoincareModel(relations_1, negative=1)
+            >>> # train a new model from initial data
+            >>> model = PoincareModel(initial_relations, negative=1)
             >>> model.train(epochs=50)
             >>>
             >>> model.build_vocab(relations_2, update=True)

From 02467a25554f79636fae76b92ff737068acc1a0b Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:22:58 +0900
Subject: [PATCH 13/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 0cac6cb1cf..53f6b43d75 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -204,7 +204,9 @@ def build_vocab(self, relations, update=False):
             >>> model = PoincareModel(initial_relations, negative=1)
             >>> model.train(epochs=50)
             >>>
-            >>> model.build_vocab(relations_2, update=True)
+            >>> # online training: update the vocabulary and continue training
+            >>> online_relations = [('striped_skunk', 'mammal')]
+            >>> model.build_vocab(online_relations, update=True)
             >>> model.train(epochs=50)
 
         """

From 38b5b7c964b8421251abe3a64c60f3ef2df5a547 Mon Sep 17 00:00:00 2001
From: koiizukag <41324565+koiizukag@users.noreply.github.com>
Date: Wed, 26 Jun 2019 14:23:21 +0900
Subject: [PATCH 14/15] Update gensim/models/poincare.py

Co-Authored-By: Michael Penkov <m@penkov.dev>
---
 gensim/models/poincare.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 53f6b43d75..a39bd17932 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -197,7 +197,7 @@ def build_vocab(self, relations, update=False):
         .. sourcecode:: pycon
 
             >>> from gensim.models.poincare import PoincareModel
-            >>> relations_1 = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
+            >>> initial_relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
             >>> relations_2 = [('striped_skunk', 'mammal')]
             >>>
             >>> # train a new model from initial data

From 28007e68b045cd824035c725b371ae5dc759cb6e Mon Sep 17 00:00:00 2001
From: Michael Penkov <m@penkov.dev>
Date: Wed, 26 Jun 2019 15:34:37 +0900
Subject: [PATCH 15/15] Update poincare.py

minor update
---
 gensim/models/poincare.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index a39bd17932..42a3a60d48 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -197,10 +197,9 @@ def build_vocab(self, relations, update=False):
         .. sourcecode:: pycon
 
             >>> from gensim.models.poincare import PoincareModel
-            >>> initial_relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
-            >>> relations_2 = [('striped_skunk', 'mammal')]
             >>>
             >>> # train a new model from initial data
+            >>> initial_relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
             >>> model = PoincareModel(initial_relations, negative=1)
             >>> model.train(epochs=50)
             >>>