From 30eb1a3aa37624755e40299e73fb86ed185296c1 Mon Sep 17 00:00:00 2001 From: Santhosh18 Date: Wed, 2 Oct 2019 16:35:50 +0530 Subject: [PATCH 1/2] Fix for #2574 --- gensim/corpora/dictionary.py | 1 + gensim/test/test_corpora_dictionary.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py index 229d28d2c9..2ba296b115 100644 --- a/gensim/corpora/dictionary.py +++ b/gensim/corpora/dictionary.py @@ -472,6 +472,7 @@ def compactify(self): self.token2id = {token: idmap[tokenid] for token, tokenid in iteritems(self.token2id)} self.id2token = {} self.dfs = {idmap[tokenid]: freq for tokenid, freq in iteritems(self.dfs)} + self.cfs = {idmap[tokenid]: freq for tokenid, freq in iteritems(self.cfs)} def save_as_text(self, fname, sort_by_word=True): """Save :class:`~gensim.corpora.dictionary.Dictionary` to a text file. diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py index cca9993952..918c18c9fd 100644 --- a/gensim/test/test_corpora_dictionary.py +++ b/gensim/test/test_corpora_dictionary.py @@ -124,8 +124,10 @@ def testMerge(self): def testFilter(self): d = Dictionary(self.texts) d.filter_extremes(no_below=2, no_above=1.0, keep_n=4) - expected = {0: 3, 1: 3, 2: 3, 3: 3} - self.assertEqual(d.dfs, expected) + dfs_expected = {0: 3, 1: 3, 2: 3, 3: 3} + cfs_expected = {0: 4, 1: 3, 2: 3, 3: 3} + self.assertEqual(d.dfs, dfs_expected) + self.assertEqual(d.dfs, cfs_expected) def testFilterKeepTokens_keepTokens(self): # provide keep_tokens argument, keep the tokens given From 95d243d3bf2d78b7f0a5b3868c9637647080f380 Mon Sep 17 00:00:00 2001 From: Santhosh18 Date: Wed, 2 Oct 2019 16:38:48 +0530 Subject: [PATCH 2/2] Fix for #2574 --- gensim/test/test_corpora_dictionary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py index 918c18c9fd..29338ad45e 100644 --- a/gensim/test/test_corpora_dictionary.py +++ b/gensim/test/test_corpora_dictionary.py @@ -127,7 +127,7 @@ def testFilter(self): dfs_expected = {0: 3, 1: 3, 2: 3, 3: 3} cfs_expected = {0: 4, 1: 3, 2: 3, 3: 3} self.assertEqual(d.dfs, dfs_expected) - self.assertEqual(d.dfs, cfs_expected) + self.assertEqual(d.cfs, cfs_expected) def testFilterKeepTokens_keepTokens(self): # provide keep_tokens argument, keep the tokens given