Skip to content

Commit eb317cd

Browse files
authored
fix: dev target needs special permission, now VLLM version works for run.sh and Dev Container (#2822)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>
1 parent 27fad26 commit eb317cd

File tree

4 files changed

+140
-54
lines changed

4 files changed

+140
-54
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"name": "NVIDIA Dynamo Dev Container Development",
88
"remoteUser": "ubuntu", // Matches our container user
99
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
10-
"image": "dynamo:latest-vllm", // Use the latest VLLM local dev image
10+
"image": "dynamo:latest-vllm-dev", // Use the latest VLLM dev image
1111
"runArgs": [
1212
"--gpus=all",
1313
"--network=host",

container/Dockerfile.vllm

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,13 @@ CMD []
296296

297297
FROM runtime AS dev
298298

299-
# Install utilities
299+
# Don't want ubuntu to be editable, just change uid and gid.
300+
ENV USERNAME=ubuntu
301+
ARG USER_UID
302+
ARG USER_GID
303+
ARG WORKSPACE_DIR=/workspace
304+
305+
# Install utilities as root
300306
RUN apt-get update -y && \
301307
apt-get install -y --no-install-recommends \
302308
# Install utilities
@@ -325,46 +331,59 @@ RUN apt-get update -y && \
325331
protobuf-compiler && \
326332
rm -rf /var/lib/apt/lists/*
327333

328-
# Rust environment setup
329-
ENV RUSTUP_HOME=/usr/local/rustup \
330-
CARGO_HOME=/usr/local/cargo \
331-
CARGO_TARGET_DIR=/opt/dynamo/target \
332-
PATH=/usr/local/cargo/bin:$PATH
333-
334-
COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
335-
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
334+
COPY --from=runtime /usr/local/bin /usr/local/bin
336335

337336
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
338337
# Will use the default ubuntu user, but give sudo access
339338
# Needed so files permissions aren't set to root ownership when writing from inside container
340-
341-
# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
342-
ENV USERNAME=ubuntu
343-
ARG USER_UID=1000
344-
ARG USER_GID=1000
345-
346339
RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
347340
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
348341
&& chmod 0440 /etc/sudoers.d/$USERNAME \
349342
&& mkdir -p /home/$USERNAME \
343+
&& groupmod -g $USER_GID $USERNAME \
344+
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
350345
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
351346
&& rm -rf /var/lib/apt/lists/* \
352347
&& chsh -s /bin/bash $USERNAME
353348

349+
# At this point, we are executing as the ubuntu user
350+
USER $USERNAME
351+
ENV HOME=/home/$USERNAME
352+
WORKDIR $HOME
353+
354+
# Set workspace directory variable
355+
ENV WORKSPACE_DIR=${WORKSPACE_DIR}
356+
357+
# Development environment variables for the dev target
358+
# Path configuration notes:
359+
# - DYNAMO_HOME: Main project directory (workspace mount point)
360+
# - CARGO_TARGET_DIR: Build artifacts in workspace/target for persistence
361+
# - CARGO_HOME: Must be in $HOME/.cargo (not workspace) because:
362+
# * Workspace gets mounted to different paths where cargo binaries may not exist
363+
# * Contains critical cargo binaries and registry that need consistent paths
364+
# - RUSTUP_HOME: Must be in $HOME/.rustup (not workspace) because:
365+
# * Contains rust toolchain binaries that must be at expected system paths
366+
# * Workspace mount point would break rustup's toolchain resolution
367+
# - PATH: Includes cargo binaries for rust tool access
368+
ENV DYNAMO_HOME=${WORKSPACE_DIR}
369+
ENV CARGO_TARGET_DIR=${WORKSPACE_DIR}/target
370+
ENV CARGO_HOME=${HOME}/.cargo
371+
ENV RUSTUP_HOME=${HOME}/.rustup
372+
ENV PATH=${CARGO_HOME}/bin:$PATH
373+
374+
COPY --from=dynamo_base --chown=$USER_UID:$USER_GID /usr/local/rustup $RUSTUP_HOME
375+
COPY --from=dynamo_base --chown=$USER_UID:$USER_GID /usr/local/cargo $CARGO_HOME
376+
354377
# This is a slow operation (~40s on my cpu)
355378
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
356379
COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
357-
RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
358-
COPY --from=runtime --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
359380

360381
# so we can use maturin develop
361382
RUN uv pip install maturin[patchelf]
362383

363-
USER $USERNAME
364-
ENV HOME=/home/$USERNAME
365-
ENV PYTHONPATH=$PYTHONPATH:$HOME/dynamo/components/planner/src
366-
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
367-
WORKDIR $HOME
384+
# Make sure to sync this with the one specified on README.md.
385+
# This is a generic PYTHONPATH which works for all the frameworks, so some paths may not be relevant for this particular framework.
386+
ENV PYTHONPATH=${WORKSPACE_DIR}/components/metrics/src:${WORKSPACE_DIR}/components/frontend/src:${WORKSPACE_DIR}/components/planner/src:${WORKSPACE_DIR}/components/backends/mocker/src:${WORKSPACE_DIR}/components/backends/trtllm/src:${WORKSPACE_DIR}/components/backends/vllm/src:${WORKSPACE_DIR}/components/backends/sglang/src:${WORKSPACE_DIR}/components/backends/llama_cpp/src
368387

369388
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
370389
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \

container/build.sh

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ set -e
2424
TAG=
2525
RUN_PREFIX=
2626
PLATFORM=linux/amd64
27+
USER_UID=
28+
USER_GID=
2729

2830
# Get short commit hash
2931
commit_id=$(git rev-parse --short HEAD)
@@ -243,6 +245,22 @@ get_options() {
243245
missing_requirement "$1"
244246
fi
245247
;;
248+
--uid)
249+
if [ "$2" ]; then
250+
USER_UID="$2"
251+
shift
252+
else
253+
missing_requirement "$1"
254+
fi
255+
;;
256+
--gid)
257+
if [ "$2" ]; then
258+
USER_GID="$2"
259+
shift
260+
else
261+
missing_requirement "$1"
262+
fi
263+
;;
246264
--dry-run)
247265
RUN_PREFIX="echo"
248266
echo ""
@@ -419,6 +437,8 @@ show_help() {
419437
echo " [--cache-from cache location to start from]"
420438
echo " [--cache-to location where to cache the build output]"
421439
echo " [--tag tag for image]"
440+
echo " [--uid user ID for dev target (default: current user)]"
441+
echo " [--gid group ID for dev target (default: current group)]"
422442
echo " [--no-cache disable docker build cache]"
423443
echo " [--dry-run print docker commands without running]"
424444
echo " [--build-context name=path to add build context]"
@@ -443,6 +463,13 @@ error() {
443463

444464
get_options "$@"
445465

466+
# Validate UID/GID flags are only used with dev target
467+
if [ -n "$USER_UID" ] || [ -n "$USER_GID" ]; then
468+
if [[ "$TARGET" != "dev" ]]; then
469+
echo "⚠️ Warning: --uid and --gid flags are only effective with --target dev"
470+
echo " Current target: ${TARGET:-}"
471+
fi
472+
fi
446473

447474
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
448475
ARCH="amd64"
@@ -466,7 +493,14 @@ fi
466493
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
467494

468495
if [[ $TARGET == "dev" ]]; then
469-
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) "
496+
# Use provided UID/GID or default to current user
497+
if [ -z "$USER_UID" ]; then
498+
USER_UID=$(id -u)
499+
fi
500+
if [ -z "$USER_GID" ]; then
501+
USER_GID=$(id -g)
502+
fi
503+
BUILD_ARGS+=" --build-arg USER_UID=$USER_UID --build-arg USER_GID=$USER_GID "
470504
fi
471505

472506
# BUILD DEV IMAGE
@@ -599,8 +633,6 @@ if [ "$USE_SCCACHE" = true ]; then
599633
BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
600634
BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
601635
BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
602-
603-
604636
fi
605637

606638
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"

container/run.sh

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ DEFAULT_FRAMEWORK=VLLM
3131
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
3232

3333
IMAGE=
34+
TARGET="dev"
3435
HF_CACHE=
3536
DEFAULT_HF_CACHE=${SOURCE_DIR}/.cache/huggingface
3637
GPUS="all"
3738
PRIVILEGED=
3839
VOLUME_MOUNTS=
3940
MOUNT_WORKSPACE=
41+
DEV_MODE=
4042
ENVIRONMENT_VARIABLES=
4143
REMAINING_ARGS=
4244
INTERACTIVE=
@@ -162,6 +164,7 @@ get_options() {
162164
--mount-workspace)
163165
MOUNT_WORKSPACE=TRUE
164166
;;
167+
165168
--use-nixl-gds)
166169
USE_NIXL_GDS=TRUE
167170
;;
@@ -227,32 +230,11 @@ get_options() {
227230
ENTRYPOINT_STRING="--entrypoint ${ENTRYPOINT}"
228231
fi
229232

230-
if [ -n "$MOUNT_WORKSPACE" ]; then
231-
VOLUME_MOUNTS+=" -v ${SOURCE_DIR}/..:/workspace "
232-
VOLUME_MOUNTS+=" -v /tmp:/tmp "
233-
VOLUME_MOUNTS+=" -v /mnt/:/mnt "
234-
235-
if [ -z "$HF_CACHE" ]; then
236-
HF_CACHE=$DEFAULT_HF_CACHE
237-
fi
238-
239-
if [ -z "${PRIVILEGED}" ]; then
240-
PRIVILEGED="TRUE"
241-
fi
242-
243-
ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"
244-
245-
INTERACTIVE=" -it "
246-
fi
247-
248233
if [[ ${HF_CACHE^^} == "NONE" ]]; then
249234
HF_CACHE=
250235
fi
251236

252-
if [ -n "$HF_CACHE" ]; then
253-
mkdir -p "$HF_CACHE"
254-
VOLUME_MOUNTS+=" -v $HF_CACHE:/root/.cache/huggingface"
255-
fi
237+
# HF_CACHE mounting will be handled in workspace section
256238

257239
if [ -z "${PRIVILEGED}" ]; then
258240
PRIVILEGED="FALSE"
@@ -262,9 +244,9 @@ get_options() {
262244
RM="TRUE"
263245
fi
264246

265-
if [[ ${PRIVILEGED^^} == "FALSE" ]]; then
266-
PRIVILEGED_STRING=""
267-
else
247+
# Initialize PRIVILEGED_STRING
248+
PRIVILEGED_STRING=""
249+
if [[ ${PRIVILEGED^^} != "FALSE" ]]; then
268250
PRIVILEGED_STRING="--privileged"
269251
fi
270252

@@ -277,7 +259,6 @@ get_options() {
277259
if [ -n "$USE_NIXL_GDS" ]; then
278260
VOLUME_MOUNTS+=" -v /run/udev:/run/udev:ro "
279261
NIXL_GDS_CAPS="--cap-add=IPC_LOCK"
280-
281262
# NOTE(jthomson04): In the KVBM disk pools, we currently allocate our files in /tmp.
282263
# For some arcane reason, GDS requires that /tmp be mounted.
283264
# This is already handled for us if we set --mount-workspace
@@ -291,14 +272,25 @@ get_options() {
291272
if [[ "$GPUS" == "none" || "$GPUS" == "NONE" ]]; then
292273
RUNTIME=""
293274
fi
275+
276+
# Auto-enable DEV_MODE for vllm dev images
277+
# TODO(keivenc): Currently only Dockerfile.vllm has proper permissions to run as ubuntu user.
278+
# Other Dockerfiles (trtllm, sglang, etc.) still require root access.
279+
if [[ "$IMAGE" == *"-vllm-dev" ]]; then
280+
DEV_MODE=TRUE
281+
MOUNT_WORKSPACE=TRUE
282+
# Interactive mode is implied when MOUNT_WORKSPACE is TRUE
283+
fi
284+
294285
REMAINING_ARGS=("$@")
295286
}
296287

297288
show_help() {
298289
echo "usage: run.sh"
299290
echo " [--image image]"
300291
echo " [--framework framework one of ${!FRAMEWORKS[*]}]"
301-
echo " [--name name for launched container, default NONE] "
292+
echo " [--target target stage to use, default is 'dev']"
293+
echo " [--name name for launched container, default NONE]"
302294
echo " [--privileged whether to launch in privileged mode, default FALSE unless mounting workspace]"
303295
echo " [--dry-run print docker commands without running]"
304296
echo " [--hf-cache directory to volume mount as the hf cache, default is NONE unless mounting workspace]"
@@ -310,6 +302,8 @@ show_help() {
310302
echo " [-- stop processing and pass remaining args as command to docker run]"
311303
echo " [--workdir set the working directory inside the container]"
312304
echo " [--runtime add runtime variables]"
305+
echo " [--entrypoint override container entrypoint]"
306+
echo " [-h, --help show this help]"
313307
exit 0
314308
}
315309

@@ -324,8 +318,49 @@ error() {
324318

325319
get_options "$@"
326320

327-
# RUN the image
321+
# Process workspace mounting after auto-detection
322+
if [ -n "$MOUNT_WORKSPACE" ]; then
323+
HOME_PATH="/home/ubuntu"
324+
325+
# Common workspace setup
326+
VOLUME_MOUNTS+=" -v $(dirname "${SOURCE_DIR}"):/workspace "
327+
VOLUME_MOUNTS+=" -v /tmp:/tmp "
328+
VOLUME_MOUNTS+=" -v /mnt/:/mnt "
329+
WORKDIR=/workspace
330+
INTERACTIVE=" -it "
331+
332+
# Set default HF_CACHE if not specified
333+
if [ -z "$HF_CACHE" ]; then
334+
HF_CACHE=$DEFAULT_HF_CACHE
335+
fi
328336

337+
# Environment variables for all workspace modes
338+
ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"
339+
ENVIRONMENT_VARIABLES+=" -e GITHUB_TOKEN"
340+
ENVIRONMENT_VARIABLES+=" -e HOME=$HOME_PATH"
341+
342+
# Mount HF_CACHE to user's home cache directory
343+
if [ -n "$HF_CACHE" ]; then
344+
mkdir -p "$HF_CACHE"
345+
VOLUME_MOUNTS+=" -v $HF_CACHE:$HOME_PATH/.cache/huggingface"
346+
fi
347+
348+
if [ -n "$DEV_MODE" ]; then
349+
# Dev Container-specific setup - the Dockerfile handles UID/GID mapping via build args
350+
# This currently only works with Dockerfile.vllm which has proper ubuntu user setup.
351+
echo "Dev Container mode enabled - using ubuntu user with host UID/GID"
352+
# Use ubuntu user (with correct UID/GID baked into image)
353+
PRIVILEGED_STRING+=" --user ubuntu"
354+
else
355+
# Standard workspace mode - enable privileged mode
356+
# TODO(keivenc): Security risk, remove soon. Dockerfiles (trtllm, sglang) still need to run as root.
357+
if [ -z "${PRIVILEGED}" ]; then
358+
PRIVILEGED_STRING="--privileged"
359+
fi
360+
fi
361+
fi
362+
363+
# RUN the image
329364
if [ -z "$RUN_PREFIX" ]; then
330365
set -x
331366
fi

0 commit comments

Comments
 (0)