diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ad0f5d379..e8f080b11d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ Changes ======= +0.13.2 + +* topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747) + - In hdpmodel and dtmmodel + - NOT BACKWARDS COMPATIBLE! 0.13.1, 2016-06-22 diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py index 0103ce9148..25c307f68e 100755 --- a/gensim/models/hdpmodel.py +++ b/gensim/models/hdpmodel.py @@ -134,7 +134,7 @@ class HdpModel(interfaces.TransformationABC): on a training corpus: >>> hdp = HdpModel(corpus, id2word) - >>> hdp.print_topics(topics=20, topn=10) + >>> hdp.print_topics(show_topics=20, num_words=10) Inference on new documents is based on the approximately LDA-equivalent topics. @@ -456,15 +456,15 @@ def update_expectations(self): self.m_timestamp[:] = self.m_updatect self.m_status_up_to_date = True - def print_topics(self, topics=20, topn=20): - """Alias for `show_topics()` that prints the `topn` most + def print_topics(self, num_topics=20, num_words=20): + """Alias for `show_topics()` that prints the `num_words` most probable words for `topics` number of topics to log. Set `topics=-1` to print all topics.""" - return self.show_topics(topics=topics, topn=topn, log=True) + return self.show_topics(num_topics=num_topics, num_words=num_words, log=True) - def show_topics(self, topics=20, topn=20, log=False, formatted=True): + def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True): """ - Print the `topN` most probable words for `topics` number of topics. + Print the `num_words` most probable words for `topics` number of topics. Set `topics=-1` to print all topics. Set `formatted=True` to return the topics as a list of strings, or @@ -475,7 +475,7 @@ def show_topics(self, topics=20, topn=20, log=False, formatted=True): self.update_expectations() betas = self.m_lambda + self.m_eta hdp_formatter = HdpTopicFormatter(self.id2word, betas) - return hdp_formatter.show_topics(topics, topn, log, formatted) + return hdp_formatter.show_topics(num_topics, num_words, log, formatted) def save_topics(self, doc_count=None): """legacy method; use `self.save()` instead""" @@ -578,24 +578,24 @@ def __init__(self, dictionary=None, topic_data=None, topic_file=None, style=None self.style = style - def print_topics(self, topics=10, topn=10): - return self.show_topics(topics, topn, True) + def print_topics(self, num_topics=10, num_words=10): + return self.show_topics(num_topics, num_words, True) - def show_topics(self, topics=10, topn=10, log=False, formatted=True): + def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): shown = [] - if topics < 0: - topics = len(self.data) + if num_topics < 0: + num_topics = len(self.data) - topics = min(topics, len(self.data)) + num_topics = min(num_topics, len(self.data)) - for k in xrange(topics): + for k in xrange(num_topics): lambdak = list(self.data[k, :]) lambdak = lambdak / sum(lambdak) temp = zip(lambdak, xrange(len(lambdak))) temp = sorted(temp, key=lambda x: x[0], reverse=True) - topic_terms = self.show_topic_terms(temp, topn) + topic_terms = self.show_topic_terms(temp, num_words) if formatted: topic = self.format_topic(k, topic_terms) @@ -609,8 +609,8 @@ def show_topics(self, topics=10, topn=10, log=False, formatted=True): return shown - def show_topic_terms(self, topic_data, topn): - return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:topn]] + def show_topic_terms(self, topic_data, num_words): + return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:num_words]] def format_topic(self, topic_id, topic_terms): if self.STYLE_GENSIM == self.style: diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py index 40694c4517..a55a605391 100644 --- a/gensim/models/wrappers/dtmmodel.py +++ b/gensim/models/wrappers/dtmmodel.py @@ -235,23 +235,23 @@ def train(self, corpus, time_slices, mode, model): # influence[2,5] influence of document 2 on topic 5 self.influences_time.append(influence) - def print_topics(self, topics=10, times=5, topn=10): - return self.show_topics(topics, times, topn, log=True) + def print_topics(self, num_topics=10, times=5, num_words=10): + return self.show_topics(num_topics, times, num_words, log=True) - def show_topics(self, topics=10, times=5, topn=10, log=False, formatted=True): + def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted=True): """ - Print the `topn` most probable words for `topics` number of topics at 'times' time slices. + Print the `num_words` most probable words for `num_topics` number of topics at 'times' time slices. Set `topics=-1` to print all topics. Set `formatted=True` to return the topics as a list of strings, or `False` as lists of (weight, word) pairs. """ - if topics < 0 or topics >= self.num_topics: - topics = self.num_topics - chosen_topics = range(topics) + if num_topics < 0 or num_topics >= self.num_topics: + num_topics = self.num_topics + chosen_topics = range(num_topics) else: - topics = min(topics, self.num_topics) - chosen_topics = range(topics) + num_topics = min(num_topics, self.num_topics) + chosen_topics = range(num_topics) # add a little random jitter, to randomize results around the same # alpha # sort_alpha = self.alpha + 0.0001 * \ diff --git a/gensim/test/test_dtm.py b/gensim/test/test_dtm.py index a4e1417719..36d893cf17 100644 --- a/gensim/test/test_dtm.py +++ b/gensim/test/test_dtm.py @@ -40,7 +40,7 @@ def testDtm(self): self.dtm_path, self.corpus, self.time_slices, num_topics=2, id2word=self.id2word, model='dtm', initialize_lda=True, rng_seed=1) - topics = model.show_topics(topics=2, times=2, topn=10) + topics = model.show_topics(num_topics=2, times=2, num_words=10) self.assertEqual(len(topics), 4) one_topic = model.show_topic(topicid=1, time=1, topn=10) @@ -53,7 +53,7 @@ def testDim(self): self.dtm_path, self.corpus, self.time_slices, num_topics=2, id2word=self.id2word, model='fixed', initialize_lda=True, rng_seed=1) - topics = model.show_topics(topics=2, times=2, topn=10) + topics = model.show_topics(num_topics=2, times=2, num_words=10) self.assertEqual(len(topics), 4) one_topic = model.show_topic(topicid=1, time=1, topn=10) diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py index 9f6d06aaf3..0d140d3316 100644 --- a/gensim/test/test_hdpmodel.py +++ b/gensim/test/test_hdpmodel.py @@ -55,7 +55,7 @@ def setUp(self): self.model = self.class_(corpus, id2word=dictionary) def testShowTopics(self): - topics = self.model.show_topics(formatted=False) + topics = self.model.show_topics(formatted=False, num_topics=20, num_words=20) for topic_no, topic in topics: self.assertTrue(isinstance(topic_no, int)) diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py index c1bac44605..94a152610a 100644 --- a/gensim/test/test_ldamodel.py +++ b/gensim/test/test_ldamodel.py @@ -48,7 +48,6 @@ def testfile(): return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') - class TestLdaModel(unittest.TestCase): def setUp(self): self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))