forked from JohnSnowLabs/spark-nlp-workshop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
77 lines (63 loc) · 2.04 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#Download base image ubuntu 18.04
FROM ubuntu:18.04
ENV NB_USER jovyan
ENV NB_UID 1000
ENV HOME /home/${NB_USER}
ENV PYSPARK_PYTHON=python3
ENV PYSPARK_DRIVER_PYTHON=python3
RUN apt-get update && apt-get install -y \
tar \
wget \
bash \
rsync \
gcc \
libfreetype6-dev \
libhdf5-serial-dev \
libpng-dev \
libzmq3-dev \
python3 \
python3-dev \
python3-pip \
unzip \
pkg-config \
software-properties-common \
graphviz
RUN adduser --disabled-password \
--gecos "Default user" \
--uid ${NB_UID} \
${NB_USER}
# Install OpenJDK-8
RUN apt-get update && \
apt-get install -y openjdk-8-jdk && \
apt-get install -y ant && \
apt-get clean;
# Fix certificate issues
RUN apt-get update && \
apt-get install ca-certificates-java && \
apt-get clean && \
update-ca-certificates -f;
# Setup JAVA_HOME -- useful for docker commandline
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
RUN export JAVA_HOME
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ~/.bashrc
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN pip3 install --upgrade pip
RUN pip3 install --no-cache-dir notebook==5.* numpy pyspark==2.4.4 spark-nlp==2.5.4 pandas mlflow Keras scikit-spark scikit-learn scipy matplotlib pydot tensorflow==1.15.0 graphviz
RUN wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-resources/glove.6B.100d.zip && \
mkdir -p /home/jovyan/data/embeddings/ && \
unzip glove.6B.100d.zip -d /home/jovyan/data/embeddings && \
rm glove.6B.100d.zip
# Make sure the contents of our repo are in ${HOME}
RUN mkdir -p /home/jovyan/tutorials
RUN mkdir -p /home/jovyan/jupyter
COPY data ${HOME}/data
COPY jupyter ${HOME}/jupyter
COPY tutorials ${HOME}/tutorials
RUN jupyter notebook --generate-config
COPY jupyter_notebook_config.json /home/jovyan/.jupyter/jupyter_notebook_config.json
USER root
RUN chown -R ${NB_UID} ${HOME}
USER ${NB_USER}
WORKDIR ${HOME}
# Specify the default command to run
CMD ["jupyter", "notebook", "--ip", "0.0.0.0"]