Skip to content

Commit

Permalink
Download spaCy English model by default since it's required by many f…
Browse files Browse the repository at this point in the history
…unctions
  • Loading branch information
hugoabonizio committed Oct 18, 2020
1 parent b29159b commit 159eede
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
10 changes: 9 additions & 1 deletion texthero/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@

import spacy
import pandas as pd
import en_core_web_sm
from nltk.stem import PorterStemmer, SnowballStemmer
from texthero._types import TextSeries, InputSeries

try:
# If not present, download 'en_core_web_sm'
import en_core_web_sm
except ModuleNotFoundError:
from spacy.cli.download import download as spacy_download

spacy_download("en_core_web_sm")
import en_core_web_sm


@InputSeries(TextSeries)
def named_entities(s: TextSeries, package="spacy") -> pd.Series:
Expand Down
1 change: 1 addition & 0 deletions texthero/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ def replace_stopwords(

if stopwords is None:
from texthero import stopwords as _stopwords

stopwords = _stopwords.DEFAULT
return s.apply(_replace_stopwords, args=(stopwords, symbol))

Expand Down
9 changes: 0 additions & 9 deletions texthero/stopwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,6 @@
nltk.download("stopwords")

from nltk.corpus import stopwords as nltk_en_stopwords

try:
# If not present, download 'en_core_web_sm'
spacy_model = spacy.load("en_core_web_sm")
except OSError:
from spacy.cli.download import download as spacy_download

spacy_download("en_core_web_sm")

from spacy.lang.en import stop_words as spacy_en_stopwords

DEFAULT = set(nltk_en_stopwords.words("english"))
Expand Down

0 comments on commit 159eede

Please sign in to comment.