Skip to content

Commit

Permalink
Use HashDictionary to pass tests
Browse files Browse the repository at this point in the history
  • Loading branch information
devashishd12 committed Jun 22, 2016
1 parent f7b9d7b commit 4cbd3ae
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions gensim/test/test_probability_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import unittest

from gensim.topic_coherence import probability_estimation
from gensim.corpora.dictionary import Dictionary
from gensim.corpora.hashdictionary import HashDictionary

class TestProbabilityEstimation(unittest.TestCase):
def setUp(self):
Expand All @@ -22,25 +22,34 @@ def setUp(self):
['user', 'response', 'time'],
['trees'],
['graph', 'trees']]
self.dictionary = Dictionary(self.texts)
self.dictionary = HashDictionary(self.texts)
# Following is the mapping:
# {u'graph': 9, u'eps': 5, u'trees': 8, u'system': 4, u'computer': 1, u'user': 3, u'human': 2, u'time': 7, u'interface': 0, u'response': 6}
# {'computer': 10608,
# 'eps': 31049,
# 'graph': 18451,
# 'human': 31002,
# 'interface': 12466,
# 'response': 5232,
# 'system': 5798,
# 'time': 29104,
# 'trees': 23844,
# 'user': 12736}
self.corpus = [self.dictionary.doc2bow(text) for text in self.texts]
# Suppose the segmented topics from s_one_pre are:
self.segmented_topics = [[(4, 9), (1, 9), (1, 4)], [(1, 9), (3, 9), (3, 1)]]
self.segmented_topics = [[(5798, 18451), (10608, 18451), (10608, 5798)], [(10608, 18451), (12736, 18451), (12736, 10608)]]

def testPBooleanDocument(self):
"""Test p_boolean_document()"""
# Unique topic ids are 1, 3, 4 and 9
# Unique topic ids are 5798, 10608, 12736 and 18451
obtained, _ = probability_estimation.p_boolean_document(self.corpus, self.segmented_topics)
expected = {9: set([5]), 3: set([1, 3]), 4: set([1, 2]), 1: set([0])}
expected = {18451: set([5]), 12736: set([1, 3]), 5798: set([1, 2]), 10608: set([0])}
self.assertTrue(obtained == expected)

def testPBooleanSlidingWindow(self):
"""Test p_boolean_sliding_window()"""
# Test with window size as 2. window_id is zero indexed.
obtained, _ = probability_estimation.p_boolean_sliding_window(self.texts, self.segmented_topics, self.dictionary, 2)
expected = {1: set([1]), 3: set([8, 2, 3]), 4: set([4, 5, 6, 7]), 9: set([11])}
expected = {10608: {1}, 12736: {8, 2, 3}, 18451: {11}, 5798: {4, 5, 6, 7}}
self.assertTrue(obtained == expected)

if __name__ == '__main__':
Expand Down

0 comments on commit 4cbd3ae

Please sign in to comment.