Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parameter name change in HdpModel and dtmmodel to be consistent with LdaModel #755

Merged
merged 13 commits into from
Jun 24, 2016
Merged
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Changes
=======
0.13.2

* topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747)
- In hdpmodel and dtmmodel
- NOT BACKWARDS COMPATIBLE!

0.13.1, 2016-06-22

Expand Down
34 changes: 17 additions & 17 deletions gensim/models/hdpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class HdpModel(interfaces.TransformationABC):
on a training corpus:

>>> hdp = HdpModel(corpus, id2word)
>>> hdp.print_topics(topics=20, topn=10)
>>> hdp.print_topics(show_topics=20, num_words=10)

Inference on new documents is based on the approximately LDA-equivalent topics.

Expand Down Expand Up @@ -456,15 +456,15 @@ def update_expectations(self):
self.m_timestamp[:] = self.m_updatect
self.m_status_up_to_date = True

def print_topics(self, topics=20, topn=20):
"""Alias for `show_topics()` that prints the `topn` most
def print_topics(self, num_topics=20, num_words=20):
"""Alias for `show_topics()` that prints the `num_words` most
probable words for `topics` number of topics to log.
Set `topics=-1` to print all topics."""
return self.show_topics(topics=topics, topn=topn, log=True)
return self.show_topics(num_topics=num_topics, num_words=num_words, log=True)

def show_topics(self, topics=20, topn=20, log=False, formatted=True):
def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
"""
Print the `topN` most probable words for `topics` number of topics.
Print the `num_words` most probable words for `topics` number of topics.
Set `topics=-1` to print all topics.

Set `formatted=True` to return the topics as a list of strings, or
Expand All @@ -475,7 +475,7 @@ def show_topics(self, topics=20, topn=20, log=False, formatted=True):
self.update_expectations()
betas = self.m_lambda + self.m_eta
hdp_formatter = HdpTopicFormatter(self.id2word, betas)
return hdp_formatter.show_topics(topics, topn, log, formatted)
return hdp_formatter.show_topics(num_topics, num_words, log, formatted)

def save_topics(self, doc_count=None):
"""legacy method; use `self.save()` instead"""
Expand Down Expand Up @@ -578,24 +578,24 @@ def __init__(self, dictionary=None, topic_data=None, topic_file=None, style=None

self.style = style

def print_topics(self, topics=10, topn=10):
return self.show_topics(topics, topn, True)
def print_topics(self, num_topics=10, num_words=10):
return self.show_topics(num_topics, num_words, True)

def show_topics(self, topics=10, topn=10, log=False, formatted=True):
def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
shown = []
if topics < 0:
topics = len(self.data)
if num_topics < 0:
num_topics = len(self.data)

topics = min(topics, len(self.data))
num_topics = min(num_topics, len(self.data))

for k in xrange(topics):
for k in xrange(num_topics):
lambdak = list(self.data[k, :])
lambdak = lambdak / sum(lambdak)

temp = zip(lambdak, xrange(len(lambdak)))
temp = sorted(temp, key=lambda x: x[0], reverse=True)

topic_terms = self.show_topic_terms(temp, topn)
topic_terms = self.show_topic_terms(temp, num_words)

if formatted:
topic = self.format_topic(k, topic_terms)
Expand All @@ -609,8 +609,8 @@ def show_topics(self, topics=10, topn=10, log=False, formatted=True):

return shown

def show_topic_terms(self, topic_data, topn):
return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:topn]]
def show_topic_terms(self, topic_data, num_words):
return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:num_words]]

def format_topic(self, topic_id, topic_terms):
if self.STYLE_GENSIM == self.style:
Expand Down
18 changes: 9 additions & 9 deletions gensim/models/wrappers/dtmmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,23 +235,23 @@ def train(self, corpus, time_slices, mode, model):
# influence[2,5] influence of document 2 on topic 5
self.influences_time.append(influence)

def print_topics(self, topics=10, times=5, topn=10):
return self.show_topics(topics, times, topn, log=True)
def print_topics(self, num_topics=10, times=5, num_words=10):
return self.show_topics(num_topics, times, num_words, log=True)

def show_topics(self, topics=10, times=5, topn=10, log=False, formatted=True):
def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted=True):
"""
Print the `topn` most probable words for `topics` number of topics at 'times' time slices.
Print the `num_words` most probable words for `num_topics` number of topics at 'times' time slices.
Set `topics=-1` to print all topics.

Set `formatted=True` to return the topics as a list of strings, or `False` as lists of (weight, word) pairs.

"""
if topics < 0 or topics >= self.num_topics:
topics = self.num_topics
chosen_topics = range(topics)
if num_topics < 0 or num_topics >= self.num_topics:
num_topics = self.num_topics
chosen_topics = range(num_topics)
else:
topics = min(topics, self.num_topics)
chosen_topics = range(topics)
num_topics = min(num_topics, self.num_topics)
chosen_topics = range(num_topics)
# add a little random jitter, to randomize results around the same
# alpha
# sort_alpha = self.alpha + 0.0001 * \
Expand Down
4 changes: 2 additions & 2 deletions gensim/test/test_dtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def testDtm(self):
self.dtm_path, self.corpus, self.time_slices, num_topics=2,
id2word=self.id2word, model='dtm', initialize_lda=True,
rng_seed=1)
topics = model.show_topics(topics=2, times=2, topn=10)
topics = model.show_topics(num_topics=2, times=2, num_words=10)
self.assertEqual(len(topics), 4)

one_topic = model.show_topic(topicid=1, time=1, topn=10)
Expand All @@ -53,7 +53,7 @@ def testDim(self):
self.dtm_path, self.corpus, self.time_slices, num_topics=2,
id2word=self.id2word, model='fixed', initialize_lda=True,
rng_seed=1)
topics = model.show_topics(topics=2, times=2, topn=10)
topics = model.show_topics(num_topics=2, times=2, num_words=10)
self.assertEqual(len(topics), 4)

one_topic = model.show_topic(topicid=1, time=1, topn=10)
Expand Down
2 changes: 1 addition & 1 deletion gensim/test/test_hdpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def setUp(self):
self.model = self.class_(corpus, id2word=dictionary)

def testShowTopics(self):
topics = self.model.show_topics(formatted=False)
topics = self.model.show_topics(formatted=False, num_topics=20, num_words=20)

for topic_no, topic in topics:
self.assertTrue(isinstance(topic_no, int))
Expand Down
1 change: 0 additions & 1 deletion gensim/test/test_ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def testfile():
return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')



class TestLdaModel(unittest.TestCase):
def setUp(self):
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
Expand Down