-
Notifications
You must be signed in to change notification settings - Fork 118
/
Dockerfile
50 lines (40 loc) · 1.53 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-devel
ENV LC_ALL=C.UTF-8 \
LANG=C.UTF-8
RUN mkdir -p /usr/share/man/man1 && \
apt-get update && apt-get install -y \
build-essential \
cifs-utils \
curl \
default-jdk \
dialog \
dos2unix \
git \
sudo
# Install app requirements first to avoid invalidating the cache
COPY requirements.txt setup.py /app/
WORKDIR /app
RUN pip install --user -r requirements.txt --no-warn-script-location && \
pip install --user entmax && \
python -c "import nltk; nltk.download('stopwords'); nltk.download('punkt')"
# Cache the pretrained BERT model
RUN python -c "from transformers import BertModel; BertModel.from_pretrained('bert-large-uncased-whole-word-masking')"
# Download & cache StanfordNLP
RUN mkdir -p /app/third_party && \
cd /app/third_party && \
curl https://download.cs.stanford.edu/nlp/software/stanford-corenlp-full-2018-10-05.zip | jar xv
# Now copy the rest of the app
COPY . /app/
# Assume that the datasets will be mounted as a volume into /mnt/data on startup.
# Symlink the data subdirectory to that volume.
ENV CACHE_DIR=/mnt/data
RUN mkdir -p /mnt/data && \
mkdir -p /app/data && \
cd /app/data && \
ln -snf /mnt/data/spider spider && \
ln -snf /mnt/data/wikisql wikisql
# Convert all shell scripts to Unix line endings, if any
RUN /bin/bash -c 'if compgen -G "/app/**/*.sh" > /dev/null; then dos2unix /app/**/*.sh; fi'
# Extend PYTHONPATH to load WikiSQL dependencies
ENV PYTHONPATH="/app/third_party/wikisql/:${PYTHONPATH}"
ENTRYPOINT bash