From e8dcdb1382375dedbc237dcf6e13fce3699f29a4 Mon Sep 17 00:00:00 2001 From: dsquareindia Date: Fri, 1 Jul 2016 16:26:04 +0530 Subject: [PATCH] Modified test. Changed wordtopics to word_topics. --- gensim/models/coherencemodel.py | 2 +- gensim/test/test_coherencemodel.py | 38 +++++++++++++++++------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py index a9b3f5c973..2c60240b77 100644 --- a/gensim/models/coherencemodel.py +++ b/gensim/models/coherencemodel.py @@ -141,7 +141,7 @@ def _get_topics(self): bestn = argsort(topic, topn=10, reverse=True) topics.append(bestn) elif isinstance(self.model, LdaMallet): - for topic in self.model.wordtopics: + for topic in self.model.word_topics: bestn = argsort(topic, topn=10, reverse=True) topics.append(bestn) else: diff --git a/gensim/test/test_coherencemodel.py b/gensim/test/test_coherencemodel.py index dba74bc9b4..075ecd56d9 100644 --- a/gensim/test/test_coherencemodel.py +++ b/gensim/test/test_coherencemodel.py @@ -14,8 +14,6 @@ import os.path import tempfile -import numpy as np - from gensim.models.coherencemodel import CoherenceModel from gensim.models.ldamodel import LdaModel from gensim.models.wrappers import LdaMallet @@ -45,29 +43,35 @@ def testfile(): class TestCoherenceModel(unittest.TestCase): def setUp(self): - self.badLdaModel = LdaModel(corpus=corpus, num_topics=2, passes=1, random_state=17) # Bad lda model - self.goodLdaModel = LdaModel(corpus=corpus, num_topics=2, passes=50, random_state=17) # Good lda model + # Suppose given below are the topics which two different LdaModels come up with. + # `topics1` is clearly better as it has a clear distinction between system-human + # interaction and graphs. Hence both the coherence measures for `topics1` should be + # greater. + self.topics1 = [['human', 'computer', 'system', 'interface'], + ['graph', 'minors', 'trees', 'eps']] + self.topics2 = [['user', 'graph', 'minors', 'system'], + ['time', 'graph', 'survey', 'minors']] - def testUMassLdaModel(self): - """Test U_Mass topic coherence algorithm on LDA Model""" - cm1 = CoherenceModel(model=self.badLdaModel, corpus=corpus, dictionary=dictionary, coherence='u_mass') - cm2 = CoherenceModel(model=self.goodLdaModel, corpus=corpus, dictionary=dictionary, coherence='u_mass') - self.assertTrue(cm1.get_coherence() < cm2.get_coherence()) + def testUMass(self): + """Test U_Mass topic coherence algorithm on given topics""" + cm1 = CoherenceModel(topics=self.topics1, corpus=corpus, dictionary=dictionary, coherence='u_mass') + cm2 = CoherenceModel(topics=self.topics2, corpus=corpus, dictionary=dictionary, coherence='u_mass') + self.assertTrue(cm1.get_coherence() > cm2.get_coherence()) - def testCvLdaModel(self): - """Test C_v topic coherence algorithm on LDA Model""" - cm1 = CoherenceModel(model=self.badLdaModel, texts=texts, dictionary=dictionary, coherence='c_v') - cm2 = CoherenceModel(model=self.goodLdaModel, texts=texts, dictionary=dictionary, coherence='c_v') - self.assertTrue(cm1.get_coherence() < cm2.get_coherence()) + def testCv(self): + """Test C_v topic coherence algorithm on given topics""" + cm1 = CoherenceModel(topics=self.topics1, texts=texts, dictionary=dictionary, coherence='c_v') + cm2 = CoherenceModel(topics=self.topics2, texts=texts, dictionary=dictionary, coherence='c_v') + self.assertTrue(cm1.get_coherence() > cm2.get_coherence()) def testErrors(self): """Test if errors are raised on bad input""" # not providing dictionary - self.assertRaises(ValueError, CoherenceModel, model=self.goodLdaModel, corpus=corpus, coherence='u_mass') + self.assertRaises(ValueError, CoherenceModel, topics=self.topics1, corpus=corpus, coherence='u_mass') # not providing texts for c_v and instead providing corpus - self.assertRaises(ValueError, CoherenceModel, model=self.goodLdaModel, corpus=corpus, dictionary=dictionary, coherence='c_v') + self.assertRaises(ValueError, CoherenceModel, topics=self.topics1, corpus=corpus, dictionary=dictionary, coherence='c_v') # not providing corpus or texts for u_mass - self.assertRaises(ValueError, CoherenceModel, self.goodLdaModel, dictionary, 'u_mass') + self.assertRaises(ValueError, CoherenceModel, topics=self.topics1, dictionary=dictionary, coherence='u_mass') if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)