Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Improve Docs CI Efficiency #3587

Merged
merged 9 commits into from
Feb 15, 2025
3 changes: 3 additions & 0 deletions .github/workflows/execute-notebook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ jobs:
run: |
bash scripts/ci_install_dependency.sh
pip install -r docs/requirements.txt
apt-get update
apt-get install -y pandoc
apt-get update && apt-get install -y parallel

- name: Setup Jupyter Kernel
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ on:
- "python/sglang/**"
- "test/**"
- "docs/**"
- "scripts/**"
pull_request:
branches: [ main ]
paths:
- "python/pyproject.toml"
- "python/sglang/**"
- "test/**"
- "docs/**"
- "scripts/**"
workflow_dispatch:
inputs:
version:
Expand Down Expand Up @@ -45,6 +47,8 @@ jobs:
filters: |
docs:
- 'docs/**'
scripts:
- 'scripts/**'
sglang:
- 'python/sglang/**'
test:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
pip install -r docs/requirements.txt
apt-get update
apt-get install -y pandoc
apt-get update && apt-get install -y parallel

- name: Setup Jupyter Kernel
run: |
Expand Down
46 changes: 27 additions & 19 deletions docs/Makefile
Original file line number Diff line number Diff line change
@@ -1,34 +1,42 @@
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the terminal, and also
# from the environment for the first two.
# Minimal Makefile for Sphinx documentation
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build

# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

# New target to compile Markdown and Jupyter Notebook files
# Compile Notebook files and record execution time
compile:
find $(SOURCEDIR) -path "*/_build/*" -prune -o -name "*.ipynb" -print | while read nb; do \
if [ -f "$$nb" ]; then \
echo "Executing $$nb"; \
jupyter nbconvert --to notebook --execute --inplace "$$nb" \
--ExecutePreprocessor.timeout=600 \
--ExecutePreprocessor.kernel_name=python3 || exit 1; \
fi; \
done
@set -e; \
echo "Starting Notebook compilation..."; \
mkdir -p logs; \
echo "Notebook execution timings:" > logs/timing.log; \
START_TOTAL=$$(date +%s); \
find $(SOURCEDIR) -path "*/_build/*" -prune -o -name "*.ipynb" -print0 | \
parallel -0 -j3 --halt soon,fail=1 ' \
NB_NAME=$$(basename {}); \
START_TIME=$$(date +%s); \
jupyter nbconvert --to notebook --execute --inplace "{}" \
--ExecutePreprocessor.timeout=600 \
--ExecutePreprocessor.kernel_name=python3; \
RET_CODE=$$?; \
END_TIME=$$(date +%s); \
ELAPSED_TIME=$$((END_TIME - START_TIME)); \
echo "$${NB_NAME}: $${ELAPSED_TIME}s" >> logs/timing.log; \
exit $$RET_CODE' || exit 1; \
END_TOTAL=$$(date +%s); \
TOTAL_ELAPSED=$$((END_TOTAL - START_TOTAL)); \
echo "---------------------------------" >> logs/timing.log; \
echo "Total execution time: $${TOTAL_ELAPSED}s" >> logs/timing.log; \
echo "All Notebook execution timings:" && cat logs/timing.log


.PHONY: help Makefile compile
.PHONY: help Makefile compile clean

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

clean:
rm -rf $(BUILDDIR)/*
rm -rf $(BUILDDIR)/* logs/timing.log
Empty file added docs/backend/__init__.py
Empty file.
28 changes: 15 additions & 13 deletions docs/backend/function_calling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@
"source": [
"from openai import OpenAI\n",
"import json\n",
"from sglang.utils import (\n",
" execute_shell_command,\n",
" wait_for_server,\n",
" terminate_process,\n",
" print_highlight,\n",
")\n",
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
"from sglang.test.test_utils import is_in_ci\n",
"\n",
"if is_in_ci():\n",
" from patch import launch_server_cmd\n",
Comment on lines +37 to +38
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 do we really want to include CI logic in the docs? I am afraid that it might confuse some users since not everyone knows what CI is.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do not have better way 😂

"else:\n",
" from sglang.utils import launch_server_cmd\n",
"\n",
"\n",
"server_process = execute_shell_command(\n",
" \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --tool-call-parser llama3 --port 30333 --host 0.0.0.0\" # llama3\n",
"server_process, port = launch_server_cmd(\n",
" \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --tool-call-parser llama3 --host 0.0.0.0\" # llama3\n",
")\n",
"wait_for_server(\"http://localhost:30333\")"
"wait_for_server(f\"http://localhost:{port}\")"
]
},
{
Expand Down Expand Up @@ -141,7 +143,7 @@
"outputs": [],
"source": [
"# Initialize OpenAI-like client\n",
"client = OpenAI(api_key=\"None\", base_url=\"http://0.0.0.0:30333/v1\")\n",
"client = OpenAI(api_key=\"None\", base_url=f\"http://0.0.0.0:{port}/v1\")\n",
"model_name = client.models.list().data[0].id"
]
},
Expand Down Expand Up @@ -377,13 +379,13 @@
" tools=tools,\n",
")\n",
"\n",
"gen_url = \"http://localhost:30333/generate\"\n",
"gen_url = f\"http://localhost:{port}/generate\"\n",
"gen_data = {\"text\": input, \"sampling_params\": {\"skip_special_tokens\": False}}\n",
"gen_response = requests.post(gen_url, json=gen_data).json()[\"text\"]\n",
"print(gen_response)\n",
"\n",
"# parse the response\n",
"parse_url = \"http://localhost:30333/function_call\"\n",
"parse_url = f\"http://localhost:{port}/function_call\"\n",
"\n",
"function_call_input = {\n",
" \"text\": gen_response,\n",
Expand All @@ -403,7 +405,7 @@
"metadata": {},
"outputs": [],
"source": [
"terminate_process(server_process)"
"terminate_process(server_process, port)"
]
},
{
Expand Down
81 changes: 45 additions & 36 deletions docs/backend/native_api.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,22 @@
"metadata": {},
"outputs": [],
"source": [
"from sglang.utils import (\n",
" execute_shell_command,\n",
" wait_for_server,\n",
" terminate_process,\n",
" print_highlight,\n",
")\n",
"\n",
"import requests\n",
"from sglang.test.test_utils import is_in_ci\n",
"\n",
"server_process = execute_shell_command(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --port=30010\n",
"\"\"\"\n",
"if is_in_ci():\n",
" from patch import launch_server_cmd\n",
"else:\n",
" from sglang.utils import launch_server_cmd\n",
"\n",
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
"\n",
"\n",
"server_process, port = launch_server_cmd(\n",
" \"python -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --host 0.0.0.0\"\n",
")\n",
"\n",
"wait_for_server(\"http://localhost:30010\")"
"wait_for_server(f\"http://localhost:{port}\")"
]
},
{
Expand All @@ -66,7 +66,7 @@
"metadata": {},
"outputs": [],
"source": [
"url = \"http://localhost:30010/generate\"\n",
"url = f\"http://localhost:{port}/generate\"\n",
"data = {\"text\": \"What is the capital of France?\"}\n",
"\n",
"response = requests.post(url, json=data)\n",
Expand All @@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"url = \"http://localhost:30010/get_model_info\"\n",
"url = f\"http://localhost:{port}/get_model_info\"\n",
"\n",
"response = requests.get(url)\n",
"response_json = response.json()\n",
Expand Down Expand Up @@ -123,7 +123,7 @@
"source": [
"# get_server_info\n",
"\n",
"url = \"http://localhost:30010/get_server_info\"\n",
"url = f\"http://localhost:{port}/get_server_info\"\n",
"\n",
"response = requests.get(url)\n",
"print_highlight(response.text)"
Expand All @@ -144,7 +144,7 @@
"metadata": {},
"outputs": [],
"source": [
"url = \"http://localhost:30010/health_generate\"\n",
"url = f\"http://localhost:{port}/health_generate\"\n",
"\n",
"response = requests.get(url)\n",
"print_highlight(response.text)"
Expand All @@ -156,7 +156,7 @@
"metadata": {},
"outputs": [],
"source": [
"url = \"http://localhost:30010/health\"\n",
"url = f\"http://localhost:{port}/health\"\n",
"\n",
"response = requests.get(url)\n",
"print_highlight(response.text)"
Expand All @@ -179,7 +179,7 @@
"source": [
"# flush cache\n",
"\n",
"url = \"http://localhost:30010/flush_cache\"\n",
"url = f\"http://localhost:{port}/flush_cache\"\n",
"\n",
"response = requests.post(url)\n",
"print_highlight(response.text)"
Expand All @@ -204,7 +204,7 @@
"source": [
"# successful update with same architecture and size\n",
"\n",
"url = \"http://localhost:30010/update_weights_from_disk\"\n",
"url = f\"http://localhost:{port}/update_weights_from_disk\"\n",
"data = {\"model_path\": \"meta-llama/Llama-3.2-1B\"}\n",
"\n",
"response = requests.post(url, json=data)\n",
Expand All @@ -222,7 +222,7 @@
"source": [
"# failed update with different parameter size or wrong name\n",
"\n",
"url = \"http://localhost:30010/update_weights_from_disk\"\n",
"url = f\"http://localhost:{port}/update_weights_from_disk\"\n",
"data = {\"model_path\": \"meta-llama/Llama-3.2-1B-wrong\"}\n",
"\n",
"response = requests.post(url, json=data)\n",
Expand Down Expand Up @@ -252,16 +252,16 @@
"metadata": {},
"outputs": [],
"source": [
"terminate_process(server_process)\n",
"terminate_process(server_process, port)\n",
"\n",
"embedding_process = execute_shell_command(\n",
"embedding_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct \\\n",
" --port 30020 --host 0.0.0.0 --is-embedding\n",
" --host 0.0.0.0 --is-embedding\n",
"\"\"\"\n",
")\n",
"\n",
"wait_for_server(\"http://localhost:30020\")"
"wait_for_server(f\"http://localhost:{port}\")"
]
},
{
Expand All @@ -272,14 +272,23 @@
"source": [
"# successful encode for embedding model\n",
"\n",
"url = \"http://localhost:30020/encode\"\n",
"url = f\"http://localhost:{port}/encode\"\n",
"data = {\"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\", \"text\": \"Once upon a time\"}\n",
"\n",
"response = requests.post(url, json=data)\n",
"response_json = response.json()\n",
"print_highlight(f\"Text embedding (first 10): {response_json['embedding'][:10]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"terminate_process(embedding_process, port)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -295,18 +304,18 @@
"metadata": {},
"outputs": [],
"source": [
"terminate_process(embedding_process)\n",
"terminate_process(embedding_process, port)\n",
"\n",
"# Note that SGLang now treats embedding models and reward models as the same type of models.\n",
"# This will be updated in the future.\n",
"\n",
"reward_process = execute_shell_command(\n",
"reward_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python -m sglang.launch_server --model-path Skywork/Skywork-Reward-Llama-3.1-8B-v0.2 --port 30030 --host 0.0.0.0 --is-embedding\n",
"python -m sglang.launch_server --model-path Skywork/Skywork-Reward-Llama-3.1-8B-v0.2 --host 0.0.0.0 --is-embedding\n",
"\"\"\"\n",
")\n",
"\n",
"wait_for_server(\"http://localhost:30030\")"
"wait_for_server(f\"http://localhost:{port}\")"
]
},
{
Expand All @@ -332,7 +341,7 @@
"tokenizer = AutoTokenizer.from_pretrained(\"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\")\n",
"prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n",
"\n",
"url = \"http://localhost:30030/classify\"\n",
"url = f\"http://localhost:{port}/classify\"\n",
"data = {\"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \"text\": prompts}\n",
"\n",
"responses = requests.post(url, json=data).json()\n",
Expand All @@ -346,7 +355,7 @@
"metadata": {},
"outputs": [],
"source": [
"terminate_process(reward_process)"
"terminate_process(reward_process, port)"
]
},
{
Expand All @@ -364,13 +373,13 @@
"metadata": {},
"outputs": [],
"source": [
"tokenizer_free_server_process = execute_shell_command(\n",
"tokenizer_free_server_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --port=30010 --skip-tokenizer-init\n",
"python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --skip-tokenizer-init\n",
"\"\"\"\n",
")\n",
"\n",
"wait_for_server(\"http://localhost:30010\")"
"wait_for_server(f\"http://localhost:{port}\")"
]
},
{
Expand All @@ -390,7 +399,7 @@
"print_highlight(f\"Tokenized Input: {input_tokens}\")\n",
"\n",
"response = requests.post(\n",
" \"http://localhost:30010/generate\",\n",
" f\"http://localhost:{port}/generate\",\n",
" json={\n",
" \"input_ids\": input_tokens,\n",
" \"sampling_params\": {\n",
Expand All @@ -416,7 +425,7 @@
"metadata": {},
"outputs": [],
"source": [
"terminate_process(tokenizer_free_server_process)"
"terminate_process(tokenizer_free_server_process, port)"
]
}
],
Expand Down
Loading
Loading