Skip to content

Commit

Permalink
Merge pull request #8 from Sydney-Informatics-Hub/fix/wordcloud_stopw…
Browse files Browse the repository at this point in the history
…ords

Fix/wordcloud stopwords
  • Loading branch information
h-croser authored Dec 18, 2024
2 parents da1b2ce + edd4147 commit cd70a61
Show file tree
Hide file tree
Showing 6 changed files with 2,609 additions and 2,334 deletions.
18 changes: 9 additions & 9 deletions juxtorpus/viz/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ def _wordcloud(corpus, max_words: int, metric: str, dtm_name: str, stopwords: li
wc = WordCloud(background_color='white', max_words=max_words, height=600, width=1200, stopwords=stopwords)

dtm = corpus.dtms[dtm_name]
with dtm.without_terms(stopwords) as dtm:
fl = dtm.freq_table()
if metric == 'tf':
df = pd.DataFrame({'Count':fl, 'Freq':1_000*fl/dtm.total})
elif metric == 'tfidf':
tfidf_mat = bow_tfidf(dtm.matrix)
df = pd.DataFrame({'Count':fl, 'Freq':sum(tfidf_mat.toarray())})
else:
raise ValueError(f"Metric {metric} is not supported. Must be one of {', '.join(metrics)}")
fl = dtm.freq_table()
if metric == 'tf':
df = pd.DataFrame({'Count':fl, 'Freq':1_000*fl/dtm.total})
elif metric == 'tfidf':
tfidf_mat = bow_tfidf(dtm.matrix)
df = pd.DataFrame({'Count':fl, 'Freq':sum(tfidf_mat.toarray())})
else:
raise ValueError(f"Metric {metric} is not supported. Must be one of {', '.join(metrics)}")
df = df[~df.index.isin(stopwords)]
df = df.sort_values('Freq', ascending=False)
counter = df.Freq.to_dict()
wc.generate_from_frequencies(counter)
Expand Down
Loading

0 comments on commit cd70a61

Please sign in to comment.