Skip to content

Commit

Permalink
Update NLTK model downloads, closes #760
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Aug 16, 2024
1 parent 6eb5ceb commit a014c3b
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
run: |
pip install -U wheel
pip install .[all,dev] fasttext==0.9.2 faiss-cpu==1.8.0 "numpy < 2.0.0"
python -c "import nltk; nltk.download('punkt')"
python -c "import nltk; nltk.download(['punkt', 'punkt_tab', 'averaged_perceptron_tagger_eng'])"
python --version
make data coverage
Expand Down
2 changes: 1 addition & 1 deletion docker/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ RUN \
python -m pip install --no-cache-dir -U pip wheel setuptools && \
if [ -z ${GPU} ] && { [ -z ${TARGETARCH} ] || [ ${TARGETARCH} = "amd64" ] ;}; then pip install --no-cache-dir torch==2.3.1+cpu torchvision==0.18.1+cpu -f https://download.pytorch.org/whl/torch_stable.html; fi && \
python -m pip install --no-cache-dir txtai${COMPONENTS} && \
python -c "import sys, importlib.util as util; 1 if util.find_spec('nltk') else sys.exit(); import nltk; nltk.download('punkt')" && \
python -c "import sys, importlib.util as util; 1 if util.find_spec('nltk') else sys.exit(); import nltk; nltk.download(['punkt', 'punkt_tab', 'averaged_perceptron_tagger_eng'])" && \
\
# Cleanup build packages
apt-get -y purge gcc g++ python${PYTHON_VERSION}-dev && apt-get -y autoremove
Expand Down
2 changes: 1 addition & 1 deletion examples/10_Extract_text_from_documents.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"\n",
"# Install NLTK\n",
"import nltk\n",
"nltk.download('punkt')"
"nltk.download(['punkt', 'punkt_tab'])"
],
"execution_count": 19,
"outputs": []
Expand Down
6 changes: 5 additions & 1 deletion examples/40_Text_to_Speech_Generation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@
},
"source": [
"%%capture\n",
"!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline] onnxruntime-gpu librosa"
"!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline] onnxruntime-gpu librosa\n",
"\n",
"# Install NLTK\n",
"import nltk\n",
"nltk.download('averaged_perceptron_tagger_eng')"
],
"execution_count": 1,
"outputs": []
Expand Down
2 changes: 1 addition & 1 deletion examples/52_Build_RAG_pipelines_with_txtai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"\n",
"# Install NLTK\n",
"import nltk\n",
"nltk.download('punkt')"
"nltk.download(['punkt', 'punkt_tab'])"
]
},
{
Expand Down

0 comments on commit a014c3b

Please sign in to comment.