-
Notifications
You must be signed in to change notification settings - Fork 0
/
nvidia.sh
73 lines (60 loc) · 2.42 KB
/
nvidia.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
# Define paths
HOST_MODEL_DIR="/data0/MODELS/nvidia/Llama3-70B-SteerLM-RM"
CONTAINER_MODEL_DIR="/models/nvidia"
# Docker image
DOCKER_IMAGE="nvcr.io/nvidia/nemo:24.01.framework"
# Inference server port
INFERENCE_PORT=1424
# Run the Docker container and start the server
docker run --gpus '"device=4,5"' \
--runtime=nvidia \
--rm \
-it \
--name lyt_nemo \
--network=host \
-v $HOST_MODEL_DIR:$CONTAINER_MODEL_DIR \
-v data:/workspace/data \
-v $HF_HOME:/workspace/hf \
-e NCCL_DEBUG=INFO \
-e NCCL_DEBUG_SUBSYS=ALL \
-e NCCL_SOCKET_IFNAME=eth0 \
-e NCCL_IB_DISABLE=1 \
-e NCCL_P2P_LEVEL=1 \
-e http_proxy="http://localhost:8001" \
-e https_proxy="http://localhost:8001" \
-e no_proxy="localhost,127.0.0.1" \
--shm-size=200g \
$DOCKER_IMAGE \
/bin/bash
# export CONTAINER_MODEL_DIR="/models/nvidia" && export HF_HOME=/workspace/hf && export INFERENCE_PORT=1424 && python /opt/NeMo-Aligner/examples/nlp/gpt/serve_reward_model.py \
# rm_model_file=$CONTAINER_MODEL_DIR \
# trainer.num_nodes=1 \
# trainer.devices=2 \
# ++model.tensor_model_parallel_size=1 \
# ++model.pipeline_model_parallel_size=2 \
# inference.micro_batch_size=2 \
# inference.port=$INFERENCE_PORT
# python /opt/NeMo-Aligner/examples/nlp/data/steerlm/attribute_annotate.py \
# --input-file=data/our-bench/step5_v5_chat_factual_construct.jsonl \
# --output-file=data/our-bench/step5_v5_chat_factual_construct_labeled.jsonl \
# --port=1424
# docker run --gpus '"device=0"' \
# --rm \
# --name nemo_inference_server \
# -p $INFERENCE_PORT:$INFERENCE_PORT \
# -v $HOST_MODEL_DIR:$CONTAINER_MODEL_DIR \
# $DOCKER_IMAGE \
# python /opt/NeMo-Aligner/examples/nlp/gpt/serve_reward_model.py \
# rm_model_file=$CONTAINER_MODEL_DIR/Llama2-13B-SteerLM-RM.nemo \
# trainer.num_nodes=1 \
# trainer.devices=1 \
# ++model.tensor_model_parallel_size=1 \
# ++model.pipeline_model_parallel_size=1 \
# inference.micro_batch_size=2 \
# inference.port=$INFERENCE_PORT
# python /opt/NeMo-Aligner/examples/nlp/data/steerlm/preprocess_openassistant_data.py --output_directory=data/oasst
# python /opt/NeMo-Aligner/examples/nlp/data/steerlm/attribute_annotate.py \
# --input-file=data/oasst/train.jsonl \
# --output-file=data/oasst/train_labeled.jsonl \
# --port=1424