-
Notifications
You must be signed in to change notification settings - Fork 123
/
Copy pathDockerfile
95 lines (77 loc) · 3.39 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
FROM pytorch/pytorch:0.4-cuda9-cudnn7-devel
COPY . /workspace/neuralbabytalk
# ----------------------------------------------------------------------------
# -- install apt and pip dependencies
# ----------------------------------------------------------------------------
RUN apt-get update && \
apt-get install -y \
ant \
ca-certificates-java \
nano \
openjdk-8-jdk \
python2.7 \
unzip \
wget && \
apt-get clean
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
RUN update-ca-certificates -f && export JAVA_HOME
RUN pip install Cython && pip install h5py \
matplotlib \
nltk \
numpy \
pycocotools \
scikit-image \
stanfordcorenlp \
tensorflow \
torchtext \
tqdm && python -c "import nltk; nltk.download('punkt')"
# ----------------------------------------------------------------------------
# -- download pretrained imagenet weights for resnet-101
# ----------------------------------------------------------------------------
RUN mkdir /workspace/neuralbabytalk/data/imagenet_weights && \
cd /workspace/neuralbabytalk/data/imagenet_weights && \
wget --quiet https://www.dropbox.com/sh/67fc8n6ddo3qp47/AAACkO4QntI0RPvYic5voWHFa/resnet101.pth
# ----------------------------------------------------------------------------
# -- download Karpathy's preprocessed captions datasets and corenlp jar
# ----------------------------------------------------------------------------
RUN cd /workspace/neuralbabytalk/data && \
wget --quiet http://cs.stanford.edu/people/karpathy/deepimagesent/caption_datasets.zip && \
unzip caption_datasets.zip && \
mv dataset_coco.json coco/ && \
mv dataset_flickr30k.json flickr30k/ && \
rm caption_datasets.zip dataset_flickr8k.json
RUN cd /workspace/neuralbabytalk/prepro && \
wget --quiet https://nlp.stanford.edu/software/stanford-corenlp-full-2017-06-09.zip && \
unzip stanford-corenlp-full-2017-06-09.zip && \
rm stanford-corenlp-full-2017-06-09.zip
RUN cd /workspace/neuralbabytalk/tools/coco-caption && \
sh get_stanford_models.sh
# ----------------------------------------------------------------------------
# -- download preprocessed COCO detection output HDF file and pretrained model
# ----------------------------------------------------------------------------
RUN cd /workspace/neuralbabytalk/data/coco && \
wget --quiet https://www.dropbox.com/s/2gzo4ops5gbjx5h/coco_detection.h5.tar.gz && \
tar -xzvf coco_detection.h5.tar.gz && \
rm coco_detection.h5.tar.gz
RUN mkdir -p /workspace/neuralbabytalk/save && \
cd /workspace/neuralbabytalk/save && \
wget --quiet https://www.dropbox.com/s/6buajkxm9oed1jp/coco_nbt_1024.tar.gz && \
tar -xzvf coco_nbt_1024.tar.gz && \
rm coco_nbt_1024.tar.gz
WORKDIR /workspace/neuralbabytalk
RUN python prepro/prepro_dic_coco.py \
--input_json data/coco/dataset_coco.json \
--split normal \
--output_dic_json data/coco/dic_coco.json \
--output_cap_json data/coco/cap_coco.json && \
python prepro/prepro_dic_coco.py \
--input_json data/coco/dataset_coco.json \
--split robust \
--output_dic_json data/robust_coco/dic_coco.json \
--output_cap_json data/robust_coco/cap_coco.json && \
python prepro/prepro_dic_coco.py \
--input_json data/coco/dataset_coco.json \
--split noc \
--output_dic_json data/noc_coco/dic_coco.json \
--output_cap_json data/noc_coco/cap_coco.json
EXPOSE 8888