-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
executable file
·325 lines (287 loc) · 11.5 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# 1. Base system
FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
SHELL ["/bin/bash", "-c"]
# TODO: change these:
# SM=86 for 3090 TI. SM=80 for A100 80GB.
ENV SM=80
ENV TORCH_CUDA_ARCH_LIST="8.0"
ENV NUM_GPUS 8
ENV PORT 59048
ENV PATH /opt/conda/bin:/usr/local/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64/stubs/:/usr/lib/x86_64-linux-gnu:/usr/local/cuda-12.2/compat/:/usr/local/cuda-12.2/targets/x86_64-linux/lib/stubs:$LD_LIBRARY_PATH
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES video,compute,utility
# ARG CODE_VERSION=latest
# ARG BUILDPLATFORM BUILDOS BUILDARCH
ENV CONDA_OS=Linux
ENV CONDA_ARCH=x86_64
# BUILDPLATFORM: linux/amd64
# BUILDARCH: amd64, arm64, riscv64
# BUILDOS: linux # TODO: echo this on MacOS.
# Environment Variables
ENV BUILD_DIR=/tmp
ENV PYTORCH_VERSION=2.1.0
# Base System utilities.
RUN apt update && apt full-upgrade -y # Good
RUN apt install -y build-essential yasm cmake libtool libc6 libc6-dev unzip wget libnuma1 libnuma-dev git pkg-config # Good
# 3. Install Miniconda
# echo
# RUN CONDA_ARCH="${BUILDARCH}"; \
# if [ "${CONDA_ARCH}" = "amd64" ]; then CONDA_ARCH=x86_64; fi; \
# CONDA_OS="${BUILDOS}"; \
# if [ "${CONDA_OS}" = "linux" ]; then CONDA_OS=Linux; fi; \
RUN wget -nv --show-progress --progress=bar:force:noscroll https://repo.anaconda.com/miniconda/Miniconda3-latest-${CONDA_OS}-${CONDA_ARCH}.sh -O Miniconda3-latest.sh
RUN chmod +x ./Miniconda3-latest.sh
RUN ./Miniconda3-latest.sh -b -p /opt/conda # Good
RUN conda init bash
# RUN source /root/.bashrc
RUN conda update --all -y
# 2. Install CUDA 12.2 and cuDNN 8.5.29
# 2.1 Install system cuSparseLt and NCCL
RUN apt install -y libcusparselt0 libcusparselt-dev libnccl2 libnccl-dev
# 3. Install NVCODEC
RUN echo "Start installing NVCODEC headers."
WORKDIR /tmp
RUN git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git
WORKDIR /tmp/nv-codec-headers
# No need for CONDA_PREFIX since we are root in Docker.
RUN make install && echo "Finished installing NVCODEC headers."
# 3.2 Build ffmpeg
RUN apt install -y libx264-dev libgnutls28-dev
WORKDIR /tmp
RUN git clone https://git.ffmpeg.org/ffmpeg.git
WORKDIR /tmp/ffmpeg
RUN ./configure \
--extra-cflags='-I/usr/local/cuda/include' \
--extra-ldflags='-L/usr/local/cuda/lib64' \
--nvccflags="-gencode arch=compute_${SM},code=sm_${SM} -O2" \
--disable-doc \
--enable-decoder=aac \
--enable-decoder=h264 \
--enable-decoder=h264_cuvid \
--enable-decoder=rawvideo \
--enable-indev=lavfi \
--enable-encoder=libx264 \
--enable-encoder=h264_nvenc \
--enable-demuxer=mov \
--enable-muxer=mp4 \
--enable-filter=scale \
--enable-filter=testsrc2 \
--enable-protocol=file \
--enable-protocol=https \
--enable-gnutls \
--enable-shared \
--enable-gpl \
--enable-nonfree \
--enable-cuda-nvcc \
--enable-libx264 \
--enable-nvenc \
--enable-cuvid \
--disable-postproc \
--disable-static \
--enable-nvdec
RUN make clean
RUN make -j
RUN make install
RUN echo '/usr/local/lib' >> /etc/ld.so.conf
RUN ldconfig
ENV VIDEO_URL="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
RUN /usr/local/bin/ffmpeg -hide_banner -y -vsync 0 \
-hwaccel cuvid \
-hwaccel_output_format cuda \
-c:v h264_cuvid \
-resize 360x240 \
-i "${VIDEO_URL}" \
-c:a copy \
-c:v h264_nvenc \
-b:v 5M test.mp4
RUN rm test.mp4
# 4. Build PyTorch
# 4.1 Install Intel basekit
# TODO: update intel gpg for apt sources lists.
# RUN apt install intel-basekit
# 4.2 Install conda deps
RUN echo "Installing PyTorch dependencies"
RUN conda install -c pytorch magma-cuda121 -y # TODO: include magma-cuda122 building in the future
# RUN conda install -c intel -c defaults cmake ninja astunparse expecttest hypothesis psutil pyyaml requests setuptools typing-extensions sympy filelock networkx jinja2 fsspec packaging -y
RUN conda install cmake ninja astunparse expecttest hypothesis psutil pyyaml requests setuptools typing-extensions sympy filelock networkx jinja2 fsspec packaging -y
RUN conda install -c defaults -c conda-forge jinja2 types-dataclasses optree -y # NOTE: jinja2 needs to be >= 3.1.2, so at the time of writing, cannot be from -c intel.
# # 4.3 Install PyTorch from source
WORKDIR ${BUILD_DIR}
RUN git clone --recursive --single-branch --branch v${PYTORCH_VERSION} https://github.com/pytorch/pytorch.git
# # 1. sync submodules
WORKDIR ${BUILD_DIR}/pytorch
RUN git submodule sync
RUN git submodule update --init --recursive
# RUN conda activate vtom
ENV _GLIBCXX_USE_CXX11_ABI=1
ENV CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
ENV USE_FFMPEG=1
# ENV USE_TBB=1
# ENV USE_SYSTEM_TBB=1
ENV USE_SYSTEM_NCCL=1
# RUN # TODO: ONNX_USE_PROTOBUF_SHARED_LIBS
# RUN # TODO: XNNPACK enabled shared
RUN echo "Start building pytorch"
RUN python setup.py clean && python setup.py develop > install_pytorch.log 2>&1
RUN echo "DONE building pytorch"
RUN pip install tqdm gradio matplotlib sentencepiece protobuf transformers tokenizers huggingface_hub accelerate
WORKDIR ${BUILD_DIR}
RUN rm -rf pytorch
# 4. Install decord
WORKDIR ${BUILD_DIR}
RUN git clone --recursive https://github.com/zhanwenchen/decord
WORKDIR ${BUILD_DIR}/decord
RUN mkdir build
WORKDIR ${BUILD_DIR}/decord/build
RUN cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=${SM} -DCMAKE_BUILD_TYPE=Release
RUN make -j
WORKDIR ${BUILD_DIR}/decord/python
RUN python setup.py install --user
RUN rm -rf build
# Cleanup
WORKDIR ${BUILD_DIR}
RUN rm -rf decord
# # 6. Install flash-attention
RUN pip install ninja einops
WORKDIR ${BUILD_DIR}
RUN git clone --single-branch --branch v2.3.2 https://github.com/Dao-AILab/flash-attention.git
WORKDIR ${BUILD_DIR}flash-attention
RUN MAX_JOBS=4 python setup.py install
# Clean up packages:
RUN conda clean --all -y
RUN pip cache purge
# Install vtom
WORKDIR /workspace
RUN git clone https://github.com/zhanwenchen/vtom.git
WORKDIR /workspace/vtom
RUN module load awscli
# RUN pip install --user awscli
WORKDIR /workspace/vtom/data
RUN aws s3 cp s3://vtom/video_chatgpt_training_removed.json video_chatgpt_training_removed.json
RUN aws s3 cp s3://vtom/ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip
RUN unzip ./ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip -d ./clip_features_train
RUN rm ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip
# Download models
WORKDIR /workspace/vtom
RUN 7z e ./LLaVA-7B-Lightening-v1-1-delta.7z
# Run training
WORKDIR /workspace/vtom
RUN PYTHONPATH="./:$PYTHONPATH" torchrun --nproc_per_node=${NUM_GPUS} --master_port ${PORT} video_chatgpt/train/train_mem.py \
--model_name_or_path ./LLaVA-7B-Lightening-v1-1-delta \
--version v1 \
--data_path video_chatgpt_training_removed.json \
--video_folder ./data/clip_features_train \
--tune_mm_mlp_adapter True \
--mm_use_vid_start_end \
--bf16 True \
--output_dir ./Video-ChatGPT_7B-1.1_Checkpoints \
--num_train_epochs 3 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 1 \
--evaluation_strategy "no" \
--save_strategy "steps" \
--save_steps 3000 \
--save_total_limit 3 \
--learning_rate 2e-5 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 100 \
--tf32 True \
--model_max_length 2048 \
--gradient_checkpointing True \
--lazy_preprocess True
# RUN echo "Installing PyTorch dependencies"
# RUN conda install -c pytorch magma-cuda121 -y # TODO: include magma-cuda122 building in the future
# # RUN conda install -c intel -c defaults cmake ninja astunparse expecttest hypothesis psutil pyyaml requests setuptools typing-extensions sympy filelock networkx jinja2 fsspec packaging -y
# RUN conda install cmake ninja astunparse expecttest hypothesis psutil pyyaml requests setuptools typing-extensions sympy filelock networkx jinja2 fsspec packaging -y
# RUN conda install -c defaults -c conda-forge jinja2 types-dataclasses optree -y # NOTE: jinja2 needs to be >= 3.1.2, so at the time of writing, cannot be from -c intel.
# # # 4.3 Install PyTorch from source
# WORKDIR ${BUILD_DIR}
# RUN git clone --recursive --single-branch --branch v${PYTORCH_VERSION} https://github.com/pytorch/pytorch.git
# # # 1. sync submodules
# WORKDIR ${BUILD_DIR}/pytorch
# RUN git submodule sync
# RUN git submodule update --init --recursive
# # RUN conda activate vtom
# ENV _GLIBCXX_USE_CXX11_ABI=1
# ENV CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
# ENV USE_FFMPEG=1
# # ENV USE_TBB=1
# # ENV USE_SYSTEM_TBB=1
# ENV USE_SYSTEM_NCCL=1
# # RUN # TODO: ONNX_USE_PROTOBUF_SHARED_LIBS
# # RUN # TODO: XNNPACK enabled shared
# RUN echo "Start building pytorch"
# RUN python setup.py clean && python setup.py develop > install_pytorch.log 2>&1
# RUN echo "DONE building pytorch"
# RUN pip install tqdm gradio matplotlib sentencepiece protobuf transformers tokenizers huggingface_hub accelerate
# WORKDIR ${BUILD_DIR}
# RUN rm -rf pytorch
# RUN python -c 'import torch; print(torch.cuda.is_available())'
# # 4. Install decord
# WORKDIR ${BUILD_DIR}
# RUN git clone --recursive https://github.com/zhanwenchen/decord
# WORKDIR ${BUILD_DIR}/decord
# RUN mkdir build
# WORKDIR ${BUILD_DIR}/decord/build
# RUN cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=${SM} -DCMAKE_BUILD_TYPE=Release
# RUN make -j
# WORKDIR ${BUILD_DIR}/decord/python
# RUN python setup.py install --user
# # Cleanup
# WORKDIR ${BUILD_DIR}
# RUN rm -rf decord
# # # 6. Install flash-attention
# RUN pip install ninja einops
# WORKDIR ${BUILD_DIR}
# RUN git clone --single-branch --branch v2.3.2 https://github.com/Dao-AILab/flash-attention.git
# WORKDIR ${BUILD_DIR}flash-attention
# RUN MAX_JOBS=4 python setup.py install
# # Clean up packages:
# RUN conda clean --all -y
# RUN pip cache purge
# # Install vtom
# WORKDIR /workspace
# RUN git clone https://github.com/zhanwenchen/vtom.git
# WORKDIR /workspace/vtom
# RUN module load awscli
# # RUN pip install --user awscli
# WORKDIR /workspace/vtom/data
# RUN aws s3 cp s3://vtom/video_chatgpt_training_removed.json video_chatgpt_training_removed.json
# RUN aws s3 cp s3://vtom/ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip
# RUN unzip ./ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip -d ./clip_features_train
# RUN rm ActivityNet_Train_Video-ChatGPT_Clip-L14_Features.zip
# # Download models
# WORKDIR /workspace/vtom
# RUN 7z e ./LLaVA-7B-Lightening-v1-1-delta.7z
# # Run training
# WORKDIR /workspace/vtom
# RUN PYTHONPATH="./:$PYTHONPATH" torchrun --nproc_per_node=${NUM_GPUS} --master_port ${PORT} video_chatgpt/train/train_mem.py \
# --model_name_or_path ./LLaVA-7B-Lightening-v1-1-delta \
# --version v1 \
# --data_path video_chatgpt_training_removed.json \
# --video_folder ./data/clip_features_train \
# --tune_mm_mlp_adapter True \
# --mm_use_vid_start_end \
# --bf16 True \
# --output_dir ./Video-ChatGPT_7B-1.1_Checkpoints \
# --num_train_epochs 3 \
# --per_device_train_batch_size 1 \
# --per_device_eval_batch_size 1 \
# --gradient_accumulation_steps 1 \
# --evaluation_strategy "no" \
# --save_strategy "steps" \
# --save_steps 3000 \
# --save_total_limit 3 \
# --learning_rate 2e-5 \
# --weight_decay 0. \
# --warmup_ratio 0.03 \
# --lr_scheduler_type "cosine" \
# --logging_steps 100 \
# --tf32 True \
# --model_max_length 2048 \
# --gradient_checkpointing True \
# --lazy_preprocess True