Skip to content

Commit d0959d0

Browse files
keivenchangdillon-cullinan
authored andcommitted
fix: dev target needs special permission, now VLLM version works for run.sh and Dev Container (#2822)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>
1 parent 4d54532 commit d0959d0

File tree

4 files changed

+140
-52
lines changed

4 files changed

+140
-52
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"name": "NVIDIA Dynamo Dev Container Development",
88
"remoteUser": "ubuntu", // Matches our container user
99
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
10-
"image": "dynamo:latest-vllm", // Use the latest VLLM local dev image
10+
"image": "dynamo:latest-vllm-dev", // Use the latest VLLM dev image
1111
"runArgs": [
1212
"--gpus=all",
1313
"--network=host",

container/Dockerfile.vllm

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,13 @@ CMD []
300300

301301
FROM runtime AS dev
302302

303-
# Install utilities
303+
# Don't want ubuntu to be editable, just change uid and gid.
304+
ENV USERNAME=ubuntu
305+
ARG USER_UID
306+
ARG USER_GID
307+
ARG WORKSPACE_DIR=/workspace
308+
309+
# Install utilities as root
304310
RUN apt-get update -y && \
305311
apt-get install -y --no-install-recommends \
306312
# Install utilities
@@ -329,46 +335,59 @@ RUN apt-get update -y && \
329335
protobuf-compiler && \
330336
rm -rf /var/lib/apt/lists/*
331337

332-
# Rust environment setup
333-
ENV RUSTUP_HOME=/usr/local/rustup \
334-
CARGO_HOME=/usr/local/cargo \
335-
CARGO_TARGET_DIR=/opt/dynamo/target \
336-
PATH=/usr/local/cargo/bin:$PATH
337-
338-
COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
339-
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
338+
COPY --from=runtime /usr/local/bin /usr/local/bin
340339

341340
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
342341
# Will use the default ubuntu user, but give sudo access
343342
# Needed so files permissions aren't set to root ownership when writing from inside container
344-
345-
# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
346-
ENV USERNAME=ubuntu
347-
ARG USER_UID=1000
348-
ARG USER_GID=1000
349-
350343
RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
351344
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
352345
&& chmod 0440 /etc/sudoers.d/$USERNAME \
353346
&& mkdir -p /home/$USERNAME \
347+
&& groupmod -g $USER_GID $USERNAME \
348+
&& usermod -u $USER_UID -g $USER_GID $USERNAME \
354349
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
355350
&& rm -rf /var/lib/apt/lists/* \
356351
&& chsh -s /bin/bash $USERNAME
357352

353+
# At this point, we are executing as the ubuntu user
354+
USER $USERNAME
355+
ENV HOME=/home/$USERNAME
356+
WORKDIR $HOME
357+
358+
# Set workspace directory variable
359+
ENV WORKSPACE_DIR=${WORKSPACE_DIR}
360+
361+
# Development environment variables for the dev target
362+
# Path configuration notes:
363+
# - DYNAMO_HOME: Main project directory (workspace mount point)
364+
# - CARGO_TARGET_DIR: Build artifacts in workspace/target for persistence
365+
# - CARGO_HOME: Must be in $HOME/.cargo (not workspace) because:
366+
# * Workspace gets mounted to different paths where cargo binaries may not exist
367+
# * Contains critical cargo binaries and registry that need consistent paths
368+
# - RUSTUP_HOME: Must be in $HOME/.rustup (not workspace) because:
369+
# * Contains rust toolchain binaries that must be at expected system paths
370+
# * Workspace mount point would break rustup's toolchain resolution
371+
# - PATH: Includes cargo binaries for rust tool access
372+
ENV DYNAMO_HOME=${WORKSPACE_DIR}
373+
ENV CARGO_TARGET_DIR=${WORKSPACE_DIR}/target
374+
ENV CARGO_HOME=${HOME}/.cargo
375+
ENV RUSTUP_HOME=${HOME}/.rustup
376+
ENV PATH=${CARGO_HOME}/bin:$PATH
377+
378+
COPY --from=dynamo_base --chown=$USER_UID:$USER_GID /usr/local/rustup $RUSTUP_HOME
379+
COPY --from=dynamo_base --chown=$USER_UID:$USER_GID /usr/local/cargo $CARGO_HOME
380+
358381
# This is a slow operation (~40s on my cpu)
359382
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
360383
COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
361-
RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
362-
COPY --from=runtime --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
363384

364385
# so we can use maturin develop
365386
RUN uv pip install maturin[patchelf]
366387

367-
USER $USERNAME
368-
ENV HOME=/home/$USERNAME
369-
ENV PYTHONPATH=$PYTHONPATH:$HOME/dynamo/components/planner/src
370-
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
371-
WORKDIR $HOME
388+
# Make sure to sync this with the one specified on README.md.
389+
# This is a generic PYTHONPATH which works for all the frameworks, so some paths may not be relevant for this particular framework.
390+
ENV PYTHONPATH=${WORKSPACE_DIR}/components/metrics/src:${WORKSPACE_DIR}/components/frontend/src:${WORKSPACE_DIR}/components/planner/src:${WORKSPACE_DIR}/components/backends/mocker/src:${WORKSPACE_DIR}/components/backends/trtllm/src:${WORKSPACE_DIR}/components/backends/vllm/src:${WORKSPACE_DIR}/components/backends/sglang/src:${WORKSPACE_DIR}/components/backends/llama_cpp/src
372391

373392
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
374393
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \

container/build.sh

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ set -e
2424
TAG=
2525
RUN_PREFIX=
2626
PLATFORM=linux/amd64
27+
USER_UID=
28+
USER_GID=
2729

2830
# Get short commit hash
2931
commit_id=$(git rev-parse --short HEAD)
@@ -243,6 +245,22 @@ get_options() {
243245
missing_requirement "$1"
244246
fi
245247
;;
248+
--uid)
249+
if [ "$2" ]; then
250+
USER_UID="$2"
251+
shift
252+
else
253+
missing_requirement "$1"
254+
fi
255+
;;
256+
--gid)
257+
if [ "$2" ]; then
258+
USER_GID="$2"
259+
shift
260+
else
261+
missing_requirement "$1"
262+
fi
263+
;;
246264
--dry-run)
247265
RUN_PREFIX="echo"
248266
echo ""
@@ -422,6 +440,8 @@ show_help() {
422440
echo " [--cache-from cache location to start from]"
423441
echo " [--cache-to location where to cache the build output]"
424442
echo " [--tag tag for image]"
443+
echo " [--uid user ID for dev target (default: current user)]"
444+
echo " [--gid group ID for dev target (default: current group)]"
425445
echo " [--no-cache disable docker build cache]"
426446
echo " [--dry-run print docker commands without running]"
427447
echo " [--build-context name=path to add build context]"
@@ -450,6 +470,13 @@ error() {
450470

451471
get_options "$@"
452472

473+
# Validate UID/GID flags are only used with dev target
474+
if [ -n "$USER_UID" ] || [ -n "$USER_GID" ]; then
475+
if [[ "$TARGET" != "dev" ]]; then
476+
echo "⚠️ Warning: --uid and --gid flags are only effective with --target dev"
477+
echo " Current target: ${TARGET:-}"
478+
fi
479+
fi
453480

454481
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
455482
ARCH="amd64"
@@ -473,7 +500,14 @@ fi
473500
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
474501

475502
if [[ $TARGET == "dev" ]]; then
476-
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) "
503+
# Use provided UID/GID or default to current user
504+
if [ -z "$USER_UID" ]; then
505+
USER_UID=$(id -u)
506+
fi
507+
if [ -z "$USER_GID" ]; then
508+
USER_GID=$(id -g)
509+
fi
510+
BUILD_ARGS+=" --build-arg USER_UID=$USER_UID --build-arg USER_GID=$USER_GID "
477511
fi
478512

479513
# BUILD DEV IMAGE

container/run.sh

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ DEFAULT_FRAMEWORK=VLLM
3131
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
3232

3333
IMAGE=
34+
TARGET="dev"
3435
HF_CACHE=
3536
DEFAULT_HF_CACHE=${SOURCE_DIR}/.cache/huggingface
3637
GPUS="all"
3738
PRIVILEGED=
3839
VOLUME_MOUNTS=
3940
MOUNT_WORKSPACE=
41+
DEV_MODE=
4042
ENVIRONMENT_VARIABLES=
4143
REMAINING_ARGS=
4244
INTERACTIVE=
@@ -162,6 +164,7 @@ get_options() {
162164
--mount-workspace)
163165
MOUNT_WORKSPACE=TRUE
164166
;;
167+
165168
--use-nixl-gds)
166169
USE_NIXL_GDS=TRUE
167170
;;
@@ -227,32 +230,11 @@ get_options() {
227230
ENTRYPOINT_STRING="--entrypoint ${ENTRYPOINT}"
228231
fi
229232

230-
if [ -n "$MOUNT_WORKSPACE" ]; then
231-
VOLUME_MOUNTS+=" -v ${SOURCE_DIR}/..:/workspace "
232-
VOLUME_MOUNTS+=" -v /tmp:/tmp "
233-
VOLUME_MOUNTS+=" -v /mnt/:/mnt "
234-
235-
if [ -z "$HF_CACHE" ]; then
236-
HF_CACHE=$DEFAULT_HF_CACHE
237-
fi
238-
239-
if [ -z "${PRIVILEGED}" ]; then
240-
PRIVILEGED="TRUE"
241-
fi
242-
243-
ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"
244-
245-
INTERACTIVE=" -it "
246-
fi
247-
248233
if [[ ${HF_CACHE^^} == "NONE" ]]; then
249234
HF_CACHE=
250235
fi
251236

252-
if [ -n "$HF_CACHE" ]; then
253-
mkdir -p "$HF_CACHE"
254-
VOLUME_MOUNTS+=" -v $HF_CACHE:/root/.cache/huggingface"
255-
fi
237+
# HF_CACHE mounting will be handled in workspace section
256238

257239
if [ -z "${PRIVILEGED}" ]; then
258240
PRIVILEGED="FALSE"
@@ -262,9 +244,9 @@ get_options() {
262244
RM="TRUE"
263245
fi
264246

265-
if [[ ${PRIVILEGED^^} == "FALSE" ]]; then
266-
PRIVILEGED_STRING=""
267-
else
247+
# Initialize PRIVILEGED_STRING
248+
PRIVILEGED_STRING=""
249+
if [[ ${PRIVILEGED^^} != "FALSE" ]]; then
268250
PRIVILEGED_STRING="--privileged"
269251
fi
270252

@@ -277,7 +259,6 @@ get_options() {
277259
if [ -n "$USE_NIXL_GDS" ]; then
278260
VOLUME_MOUNTS+=" -v /run/udev:/run/udev:ro "
279261
NIXL_GDS_CAPS="--cap-add=IPC_LOCK"
280-
281262
# NOTE(jthomson04): In the KVBM disk pools, we currently allocate our files in /tmp.
282263
# For some arcane reason, GDS requires that /tmp be mounted.
283264
# This is already handled for us if we set --mount-workspace
@@ -291,14 +272,25 @@ get_options() {
291272
if [[ "$GPUS" == "none" || "$GPUS" == "NONE" ]]; then
292273
RUNTIME=""
293274
fi
275+
276+
# Auto-enable DEV_MODE for vllm dev images
277+
# TODO(keivenc): Currently only Dockerfile.vllm has proper permissions to run as ubuntu user.
278+
# Other Dockerfiles (trtllm, sglang, etc.) still require root access.
279+
if [[ "$IMAGE" == *"-vllm-dev" ]]; then
280+
DEV_MODE=TRUE
281+
MOUNT_WORKSPACE=TRUE
282+
# Interactive mode is implied when MOUNT_WORKSPACE is TRUE
283+
fi
284+
294285
REMAINING_ARGS=("$@")
295286
}
296287

297288
show_help() {
298289
echo "usage: run.sh"
299290
echo " [--image image]"
300291
echo " [--framework framework one of ${!FRAMEWORKS[*]}]"
301-
echo " [--name name for launched container, default NONE] "
292+
echo " [--target target stage to use, default is 'dev']"
293+
echo " [--name name for launched container, default NONE]"
302294
echo " [--privileged whether to launch in privileged mode, default FALSE unless mounting workspace]"
303295
echo " [--dry-run print docker commands without running]"
304296
echo " [--hf-cache directory to volume mount as the hf cache, default is NONE unless mounting workspace]"
@@ -310,6 +302,8 @@ show_help() {
310302
echo " [-- stop processing and pass remaining args as command to docker run]"
311303
echo " [--workdir set the working directory inside the container]"
312304
echo " [--runtime add runtime variables]"
305+
echo " [--entrypoint override container entrypoint]"
306+
echo " [-h, --help show this help]"
313307
exit 0
314308
}
315309

@@ -324,8 +318,49 @@ error() {
324318

325319
get_options "$@"
326320

327-
# RUN the image
321+
# Process workspace mounting after auto-detection
322+
if [ -n "$MOUNT_WORKSPACE" ]; then
323+
HOME_PATH="/home/ubuntu"
324+
325+
# Common workspace setup
326+
VOLUME_MOUNTS+=" -v $(dirname "${SOURCE_DIR}"):/workspace "
327+
VOLUME_MOUNTS+=" -v /tmp:/tmp "
328+
VOLUME_MOUNTS+=" -v /mnt/:/mnt "
329+
WORKDIR=/workspace
330+
INTERACTIVE=" -it "
331+
332+
# Set default HF_CACHE if not specified
333+
if [ -z "$HF_CACHE" ]; then
334+
HF_CACHE=$DEFAULT_HF_CACHE
335+
fi
328336

337+
# Environment variables for all workspace modes
338+
ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"
339+
ENVIRONMENT_VARIABLES+=" -e GITHUB_TOKEN"
340+
ENVIRONMENT_VARIABLES+=" -e HOME=$HOME_PATH"
341+
342+
# Mount HF_CACHE to user's home cache directory
343+
if [ -n "$HF_CACHE" ]; then
344+
mkdir -p "$HF_CACHE"
345+
VOLUME_MOUNTS+=" -v $HF_CACHE:$HOME_PATH/.cache/huggingface"
346+
fi
347+
348+
if [ -n "$DEV_MODE" ]; then
349+
# Dev Container-specific setup - the Dockerfile handles UID/GID mapping via build args
350+
# This currently only works with Dockerfile.vllm which has proper ubuntu user setup.
351+
echo "Dev Container mode enabled - using ubuntu user with host UID/GID"
352+
# Use ubuntu user (with correct UID/GID baked into image)
353+
PRIVILEGED_STRING+=" --user ubuntu"
354+
else
355+
# Standard workspace mode - enable privileged mode
356+
# TODO(keivenc): Security risk, remove soon. Dockerfiles (trtllm, sglang) still need to run as root.
357+
if [ -z "${PRIVILEGED}" ]; then
358+
PRIVILEGED_STRING="--privileged"
359+
fi
360+
fi
361+
fi
362+
363+
# RUN the image
329364
if [ -z "$RUN_PREFIX" ]; then
330365
set -x
331366
fi

0 commit comments

Comments
 (0)