Skip to content

Commit

Permalink
Fix issue with zeroshot topic modeling missing outlier (#1957)
Browse files Browse the repository at this point in the history
  • Loading branch information
MaartenGr authored May 7, 2024
1 parent 6f91b4e commit 1aa73b3
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions bertopic/_bertopic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3679,15 +3679,20 @@ def _combine_zeroshot_topics(self,

cluster_indices = list(documents.Old_ID.values)
cluster_names = list(merged_model.topic_labels_.values())[len(set(y)):]
cluster_topics = [cluster_names[topic + self._outliers] for topic in documents.Topic.values]
if self._outliers:
cluster_topics = [cluster_names[topic] if topic != -1 else "Outliers" for topic in documents.Topic.values]
else:
cluster_topics = [cluster_names[topic] for topic in documents.Topic.values]

df = pd.DataFrame({
"Indices": zeroshot_indices + cluster_indices,
"Label": zeroshot_topics + cluster_topics}
).sort_values("Indices")
reverse_topic_labels = dict((v, k) for k, v in merged_model.topic_labels_.items())
if self._outliers:
reverse_topic_labels["Outliers"] = -1
df.Label = df.Label.map(reverse_topic_labels)
merged_model.topics_ = df.Label.values
merged_model.topics_ = df.Label.astype(int).tolist()

# Update the class internally
has_outliers = bool(self._outliers)
Expand Down

0 comments on commit 1aa73b3

Please sign in to comment.