From 9a3b359177b12247feff35b319345a20923c5846 Mon Sep 17 00:00:00 2001 From: Faiyaz Hasan Date: Sat, 9 May 2020 18:51:04 -0400 Subject: [PATCH 1/2] Removed Docker from gensim since docker image fails to build and there's nobody to maintain docker --- docker/Dockerfile | 167 ------------------------------- docker/README.md | 21 ---- docker/check_fast_version.py | 10 -- docker/start_jupyter_notebook.sh | 7 -- 4 files changed, 205 deletions(-) delete mode 100644 docker/Dockerfile delete mode 100644 docker/README.md delete mode 100644 docker/check_fast_version.py delete mode 100644 docker/start_jupyter_notebook.sh diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 73dd855a12..0000000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,167 +0,0 @@ -FROM ubuntu:16.04 - -MAINTAINER Parul Sethi - -ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim.git -ENV GENSIM_BRANCH develop - -# Installs python, pip and setup tools (with fixed versions) -RUN apt-get update \ - && apt-get install -y \ - ant=1.9.6-1ubuntu1 \ - cmake=3.5.1-1ubuntu3 \ - default-jdk=2:1.8-56ubuntu2 \ - g++=4:5.3.1-1ubuntu1 \ - git=1:2.7.4-0ubuntu1 \ - libboost-all-dev=1.58.0.1ubuntu1 \ - libgsl-dev=2.1+dfsg-2 \ - mercurial=3.7.3-1ubuntu1 \ - python3=3.5.1-3 \ - python3-pip=8.1.1-2ubuntu0.4 \ - python3-setuptools=20.7.0-1 \ - python=2.7.11-1 \ - python-pip=8.1.1-2ubuntu0.4 \ - python-setuptools=20.7.0-1 \ - unzip=6.0-20ubuntu1 \ - wget=1.17.1-1ubuntu1.3 \ - subversion=1.9.3-2ubuntu1.1 \ - locales=2.23-0ubuntu9 \ - libopenblas-dev=0.2.18-1ubuntu1 \ - libboost-program-options-dev=1.58.0.1ubuntu1 \ - zlib1g-dev=1:1.2.8.dfsg-2ubuntu4.1 - -# Setup python language -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LC_CTYPE en_US.UTF-8 -ENV LC_ALL en_US.UTF-8 - -# Upgrade pip -RUN pip2 install --upgrade pip -RUN pip3 install --upgrade pip - -# Install dependencies -RUN pip2 install \ - cython==0.25.2 \ - jupyter==1.0.0 \ - matplotlib==2.0.0 \ - nltk==3.2.2 \ - pandas==0.19.2 \ - spacy==1.8.1 \ - git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \ - -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt - -RUN pip3 install \ - cython==0.25.2 \ - jupyter==1.0.0 \ - matplotlib==2.0.0 \ - nltk==3.2.2 \ - pandas==0.19.2 \ - spacy==1.8.1 \ - git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \ - -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt - -# avoid using old numpy version installed by blocks requirements -RUN pip2 install -U numpy -RUN pip3 install -U numpy - -# Download english model of Spacy -RUN python2 -m spacy download en -RUN python3 -m spacy download en - -# Download gensim from Github -RUN git clone $GENSIM_REPOSITORY \ - && cd /gensim \ - && git checkout $GENSIM_BRANCH \ - && pip2 install .[test] \ - && python2 setup.py install \ - && pip3 install .[test] \ - && python3 setup.py install - -# Create gensim dependencies directory -RUN mkdir /gensim/gensim_dependencies - -# Set ENV variables for wrappers -ENV WR_HOME /gensim/gensim_dependencies/wordrank -ENV FT_HOME /gensim/gensim_dependencies/fastText -ENV MALLET_HOME /gensim/gensim_dependencies/mallet -ENV DTM_PATH /gensim/gensim_dependencies/dtm/dtm/main -ENV VOWPAL_WABBIT_PATH /gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw - -# For fixed version downloads of gensim wrappers dependencies -ENV WORDRANK_VERSION 44f3f7786f76c79c083dfad9d64e20bacfb4a0b0 -ENV FASTTEXT_VERSION f24a781021862f0e475a5fb9c55b7c1cec3b6e2e -ENV MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION ec2e37a3bcb8bd7b56b75b043c47076bc5decf22 -ENV DTM_VERSION 67139e6f526b2bc33aef56dc36176a1b8b210056 -ENV MALLET_VERSION 2.0.8 -ENV VOWPAL_WABBIT_VERSION 69ecc2847fa0c876c6e0557af409f386f0ced59a - -# Install custom dependencies - -# Install mpich (a wordrank dependency) and remove openmpi to avoid mpirun conflict -RUN apt-get purge -y openmpi-common openmpi-bin libopenmpi1.10 -RUN apt-get install -y mpich - -# Install wordrank -RUN cd /gensim/gensim_dependencies \ - && git clone https://bitbucket.org/shihaoji/wordrank \ - && cd /gensim/gensim_dependencies/wordrank \ - && git checkout $WORDRANK_VERSION \ - && sed -i -e 's/#export CC=gcc CXX=g++/export CC=gcc CXX=g++/g' install.sh \ - && sh ./install.sh - -# Install fastText -RUN cd /gensim/gensim_dependencies \ - && git clone https://github.com/facebookresearch/fastText.git \ - && cd /gensim/gensim_dependencies/fastText \ - && git checkout $FASTTEXT_VERSION \ - && make - -# Install MorphologicalPriorsForWordEmbeddings -RUN cd /gensim/gensim_dependencies \ - && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git \ - && cd /gensim/gensim_dependencies/MorphologicalPriorsForWordEmbeddings \ - && git checkout $MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION - -# Install DTM -RUN cd /gensim/gensim_dependencies \ - && git clone https://github.com/blei-lab/dtm.git \ - && cd /gensim/gensim_dependencies/dtm/dtm \ - && git checkout $DTM_VERSION \ - && make - -# Install Mallet -RUN mkdir /gensim/gensim_dependencies/mallet \ - && mkdir /gensim/gensim_dependencies/download \ - && cd /gensim/gensim_dependencies/download \ - && wget --quiet http://mallet.cs.umass.edu/dist/mallet-$MALLET_VERSION.zip \ - && unzip mallet-$MALLET_VERSION.zip \ - && mv ./mallet-$MALLET_VERSION/* /gensim/gensim_dependencies/mallet \ - && rm -rf /gensim/gensim_dependencies/download \ - && cd /gensim/gensim_dependencies/mallet \ - && ant - -# Install Vowpal wabbit -RUN cd /gensim/gensim_dependencies \ - && git clone https://github.com/JohnLangford/vowpal_wabbit.git \ - && cd /gensim/gensim_dependencies/vowpal_wabbit \ - && git checkout $VOWPAL_WABBIT_VERSION \ - && make \ - && make install - -# Start gensim - -# Fix ipython kernel version -RUN ipython2 kernel install -RUN ipython3 kernel install - -# Run check script -RUN python2 /gensim/docker/check_fast_version.py -RUN python3 /gensim/docker/check_fast_version.py - -# Add running permission to startup script -RUN chmod +x /gensim/docker/start_jupyter_notebook.sh - -# Define the starting command for this container and expose its running port -CMD sh -c '/gensim/docker/start_jupyter_notebook.sh 9000' -EXPOSE 9000 diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index c577413a87..0000000000 --- a/docker/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Build gensim image - -In docker directory run the following command to build the image locally: - -``` -docker build -t gensim . -``` - -# Run ipython notebook with installed gensim - -Just execute: - -``` -docker run -p 9000:9000 gensim -``` - -# Run the interactive bash mode - -``` -docker run -it gensim /bin/bash -``` diff --git a/docker/check_fast_version.py b/docker/check_fast_version.py deleted file mode 100644 index 958c59852d..0000000000 --- a/docker/check_fast_version.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys - -try: - from gensim.models.word2vec_inner import FAST_VERSION - - print('FAST_VERSION ok ! Retrieved with value ', FAST_VERSION) - sys.exit() -except ImportError: - print('Failed... fall back to plain numpy (20-80x slower training than the above)') - sys.exit(-1) diff --git a/docker/start_jupyter_notebook.sh b/docker/start_jupyter_notebook.sh deleted file mode 100644 index 7893536dd6..0000000000 --- a/docker/start_jupyter_notebook.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -PORT=$1 -NOTEBOOK_DIR=/gensim/docs/notebooks -DEFAULT_URL=/notebooks/gensim%20Quick%20Start.ipynb - -jupyter notebook --no-browser --ip=* --port=${PORT} --allow-root --notebook-dir=${NOTEBOOK_DIR} --NotebookApp.token=\"\" --NotebookApp.default_url=${DEFAULT_URL} From 044531a089d4b50f65e16efe50dbf67e9e7bf065 Mon Sep 17 00:00:00 2001 From: Faiyaz Hasan Date: Sun, 10 May 2020 16:31:05 -0400 Subject: [PATCH 2/2] Remove irrelevant comment about docker --- gensim/models/wrappers/wordrank.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/gensim/models/wrappers/wordrank.py b/gensim/models/wrappers/wordrank.py index 946787506f..ce13fc4599 100644 --- a/gensim/models/wrappers/wordrank.py +++ b/gensim/models/wrappers/wordrank.py @@ -41,11 +41,6 @@ >>> model = Wordrank.train(path_to_wordrank_binary, corpus_file='text8', out_name='wr_model') >>> >>> print(model["hello"]) # prints vector for given words - -Warnings --------- -Note that the wrapper might not work in a docker container for large datasets due to memory limits (caused by MPI). - """ from __future__ import division