You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Graphlab is now turicreate, so I modified the functions a bit to make it work
from __future__ importabsolute_importimportfuncyasfpimportnumpyasnpimportpandasaspdimportturicreateastcimportpyLDAvisdef_topics_as_df(topic_model):
tdf=topic_model.topics.to_dataframe()
returnpd.DataFrame(np.vstack(tdf['topic_probabilities'].values), index=tdf['vocabulary'])
def_sum_sarray_dicts(sarray):
counts_sf=tc.SFrame({
'count_dicts': sarray}).stack('count_dicts').groupby(
key_column_names='X1',
operations={'count': tc.aggregate.SUM('X2')})
returnlist(counts_sf.unstack(column_names=['X1', 'count'])[0].values())[0]
def_extract_doc_data(docs):
doc_lengths=list(docs.apply(lambdad: np.array(d.values()).sum()))
term_freqs_dict=_sum_sarray_dicts(docs)
vocab=term_freqs_dict.keys()
term_freqs=term_freqs_dict.values()
return {'doc_lengths': doc_lengths, 'vocab': vocab, 'term_frequency': term_freqs}
def_extract_model_data(topic_model, docs, vocab):
doc_topic_dists=np.vstack(topic_model.predict(docs, output_type='probabilities'))
doc_topic_dists=doc_topic_dists/doc_topic_dists.sum(axis=1)[:, None]
topics=_topics_as_df(topic_model)
topic_term_dists=topics.T[vocab].valuestopic_term_dists=topic_term_dists/topic_term_dists.sum(axis=1)[:, None]
print(doc_topic_dists.shape)
print(topic_term_dists.shape)
return {'topic_term_dists': topic_term_dists, 'doc_topic_dists': doc_topic_dists}
def_extract_data(topic_model, docs):
doc_data=_extract_doc_data(docs)
model_data=_extract_model_data(topic_model, docs, doc_data['vocab'])
returnfp.merge(doc_data, model_data)
defprepare(topic_model, docs, **kargs):
"""Transforms the GraphLab TopicModel and related corpus data into the data structures needed for the visualization. Parameters ---------- topic_model : graphlab.toolkits.topic_model.topic_model.TopicModel An already trained GraphLab topic model. docs : SArray of dicts The corpus in bag of word form, the same docs used to train the model. **kwargs : additional keyword arguments are passed through to :func:`pyldavis.prepare`. Returns ------- prepared_data : PreparedData the data structures used in the visualization Example -------- For example usage please see this notebook: http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/GraphLab.ipynb """opts=fp.merge(_extract_data(topic_model, docs), kargs)
returnpyLDAvis.prepare(**opts)
prepare(model,docs)
But running the above code gives me the error
---------------------------------------------------------------------------ValueErrorTraceback (mostrecentcalllast)
<ipython-input-11-098d23e5b0bf>in<module>73returnpyLDAvis.prepare(**opts)
74--->75prepare(model,docs)
<ipython-input-11-098d23e5b0bf>inprepare(topic_model, docs, **kargs)
71 """
72 opts = fp.merge(_extract_data(topic_model, docs), kargs)
---> 73 return pyLDAvis.prepare(**opts)
74
75 prepare(model,docs)
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pyLDAvis/_prepare.py in prepare(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency, R, lambda_step, mds, n_jobs, plot_opts, sort_topics)
377 print(doc_topic_dists.shape)
378 print(doc_lengths.shape)
--> 379 topic_freq = (doc_topic_dists.T * doc_lengths).T.sum()
380 # topic_freq = np.dot(doc_topic_dists.T, doc_lengths)
381 if (sort_topics):
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/__init__.py in f(self, other, axis, level, fill_value)
773 pass_op = op if axis in [0, "columns", None] else na_op
774 pass_op = pass_op if not is_logical else op
--> 775 return _combine_series_frame(
776 self, other, pass_op, fill_value=fill_value, axis=axis, level=level
777 )
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/__init__.py in _combine_series_frame(self, other, func, fill_value, axis, level)
618 new_data = left._combine_match_index(right, func)
619 else:
--> 620 new_data = dispatch_to_series(left, right, func, axis="columns")
621
622 return left._construct_result(new_data)
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/__init__.py in dispatch_to_series(left, right, func, str_rep, axis)
417 raise NotImplementedError(right)
418
--> 419 new_data = expressions.evaluate(column_op, str_rep, left, right)
420 return new_data
421
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/computation/expressions.py in evaluate(op, op_str, a, b, use_numexpr)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
210
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b)
119
120 if result is None:
--> 121 result = _evaluate_standard(op, op_str, a, b)
122
123 return result
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/computation/expressions.py in _evaluate_standard(op, op_str, a, b)
68 _store_test_result(False)
69 with np.errstate(all="ignore"):
---> 70 return op(a, b)
71
72
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/__init__.py in column_op(a, b)
405
406 def column_op(a, b):
--> 407 return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))}
408
409 elif isinstance(right, ABCSeries):
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/__init__.py in <dictcomp>(.0)
405
406 def column_op(a, b):
--> 407 return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))}
408
409 elif isinstance(right, ABCSeries):
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/__init__.py in wrapper(left, right)
501 lvalues = extract_array(left, extract_numpy=True)
502 rvalues = extract_array(right, extract_numpy=True)
--> 503 result = arithmetic_op(lvalues, rvalues, op, str_rep)
504
505 return _construct_result(left, result, index=left.index, name=res_name)
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in arithmetic_op(left, right, op, str_rep)
195 else:
196 with np.errstate(all="ignore"):
--> 197 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
198
199 return res_values
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in na_arithmetic_op(left, right, op, str_rep)
147
148 try:
--> 149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
151 result = masked_arith_op(left, right, op)
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/computation/expressions.py in evaluate(op, op_str, a, b, use_numexpr)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
210
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b)
119
120 if result is None:
--> 121 result = _evaluate_standard(op, op_str, a, b)
122
123 return result
~/anaconda3/envs/namoenv/lib/python3.8/site-packages/pandas/core/computation/expressions.py in _evaluate_standard(op, op_str, a, b)
68 _store_test_result(False)
69 with np.errstate(all="ignore"):
--->70returnop(a, b)
7172ValueError: operandscouldnotbebroadcasttogetherwithshapes (30,) (39,)
On printing the shape of the matrices about to be multiplied
(1000, 30)
(1000,)
Everything seems fine, can anyone help find me the exact issue?
The text was updated successfully, but these errors were encountered:
Graphlab is now turicreate, so I modified the functions a bit to make it work
But running the above code gives me the error
On printing the shape of the matrices about to be multiplied
Everything seems fine, can anyone help find me the exact issue?
The text was updated successfully, but these errors were encountered: