update scikit-learn

MIND-Lab · Aug 15, 2023 · 92e04c9 · 92e04c9
1 parent 4c0698f
commit 92e04c9
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 4 deletions.
diff --git a/octis/models/NMF_scikit.py b/octis/models/NMF_scikit.py
@@ -118,7 +118,7 @@ def train_model(self, dataset, hyperparameters=None, top_words=10):
             X = vectorizer.fit_transform(real_corpus)
 
             self.id2word = {i: k for i, k in enumerate(
-                vectorizer.get_feature_names())}
+                vectorizer.get_feature_names_out())}
             if self.use_partitions:
                 test_corpus = []
                 for document in partition[1]:

diff --git a/octis/models/contextualized_topic_models/utils/preprocessing.py b/octis/models/contextualized_topic_models/utils/preprocessing.py
@@ -34,7 +34,7 @@ def preprocess(self):
 
         vectorizer = CountVectorizer(max_features=self.vocabulary_size, token_pattern=r'\b[a-zA-Z]{2,}\b')
         vectorizer.fit_transform(preprocessed_docs_tmp)
-        vocabulary = set(vectorizer.get_feature_names())
+        vocabulary = set(vectorizer.get_feature_names_out())
         preprocessed_docs_tmp = [' '.join([w for w in doc.split() if w in vocabulary])
                                  for doc in preprocessed_docs_tmp]
 

diff --git a/octis/preprocessing/preprocessing.py b/octis/preprocessing/preprocessing.py
@@ -290,7 +290,7 @@ def filter_words(self, docs):
                                          stop_words=self.stopwords)
 
         vectorizer.fit_transform(docs)
-        vocabulary = vectorizer.get_feature_names()
+        vocabulary = vectorizer.get_feature_names_out()
         return vocabulary
 
     '''

diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ gensim==4.2.0
 nltk
 pandas
 spacy
-scikit-learn==0.24.2
+scikit-learn==1.1.0
 scikit-optimize>=0.8.1
 matplotlib
 torch