-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update templates after v0.5.8 llmforge
release
#391
Changes from 6 commits
d7e3f7f
9369d94
0a2853f
e176331
f3d9f80
6454865
b0281d7
0a0c8dc
22b33db
1a9490a
8d12ca7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
name: "dspy-llmforge-fine-tuning-job" | ||
entrypoint: "llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml" | ||
working_dir: "." | ||
image_uri: "localhost:5555/anyscale/llm-forge:0.5.7" | ||
image_uri: "localhost:5555/anyscale/llm-forge:0.5.8" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
name: e2e-llm-workflows | ||
entrypoint: llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml | ||
image_uri: localhost:5555/anyscale/llm-forge:0.5.7 | ||
image_uri: localhost:5555/anyscale/llm-forge:0.5.8 | ||
requirements: [] | ||
max_retries: 1 | ||
excludes: ["assets"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,16 @@ logger: | |
worker_resources: | ||
anyscale/accelerator_shape:4xA10G: 0.001 | ||
|
||
# Liger kernel configuration | ||
liger_kernel: | ||
enabled: True | ||
kwargs: | ||
rms_norm: False | ||
rope: True | ||
swiglu: True | ||
cross_entropy: True | ||
fused_linear_cross_entropy: False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make a comment on why flc is false or why rms norm is false. |
||
|
||
# Lora configuration | ||
lora_config: | ||
r: 8 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,16 @@ deepspeed: | |
worker_resources: | ||
anyscale/accelerator_shape:4xA10G: 0.001 | ||
|
||
# Liger kernel configuration | ||
liger_kernel: | ||
enabled: True | ||
kwargs: | ||
rms_norm: True | ||
rope: True | ||
swiglu: True | ||
cross_entropy: True | ||
fused_linear_cross_entropy: False | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @erictang000 umm did this value change? why was this false again? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
# Lora configuration | ||
lora_config: | ||
r: 8 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,7 +111,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -306,7 +306,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
|
@@ -330,7 +330,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -461,7 +461,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -578,7 +578,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -644,7 +644,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -761,7 +761,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -904,7 +904,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -948,7 +948,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -988,7 +988,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1025,7 +1025,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1055,7 +1055,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
|
@@ -1079,7 +1079,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 20, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1092,7 +1092,9 @@ | |
"context_length: 1024\n", | ||
"num_devices: 8\n", | ||
"num_epochs: 5\n", | ||
"checkpoint_every_n_epochs: 5\n", | ||
"checkpoint_and_evaluation_frequency: \n", | ||
" unit: epochs\n", | ||
" frequency: 5\n", | ||
"train_batch_size_per_device: 4\n", | ||
"eval_batch_size_per_device: 4\n", | ||
"lr_scheduler_type: constant\n", | ||
|
@@ -1120,7 +1122,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 21, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1206,7 +1208,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1273,7 +1275,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1371,7 +1373,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -1400,6 +1402,25 @@ | |
"This plot illustrates that as we relax the cost constraints (i.e., increase the percentage of GPT-4 calls), the performance improves. While the performance of a random router improves linearly with cost, our router achieves significantly better results at each cost level." | ||
] | ||
}, | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated the router template to use the new 0.5.8 image. I noticed that the cell execution numbers are all messed up in the notebook, so I copied over some cleanup code from the E2E LLM Workflows template to cleanup cell nums and cached checkpoints. |
||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Cleanup" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Cleanup\n", | ||
"!python src/clear_cell_nums.py\n", | ||
"!find . | grep -E \".ipynb_checkpoints\" | xargs rm -rf\n", | ||
"!find . | grep -E \"(__pycache__|\\.pyc|\\.pyo)\" | xargs rm -rf" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
|
@@ -1425,7 +1446,7 @@ | |
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.8" | ||
"version": "3.11.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from pathlib import Path | ||
|
||
import nbformat | ||
|
||
|
||
def clear_execution_numbers(nb_path): | ||
with open(nb_path, "r", encoding="utf-8") as f: | ||
nb = nbformat.read(f, as_version=4) | ||
for cell in nb["cells"]: | ||
if cell["cell_type"] == "code": | ||
cell["execution_count"] = None | ||
for output in cell["outputs"]: | ||
if "execution_count" in output: | ||
output["execution_count"] = None | ||
with open(nb_path, "w", encoding="utf-8") as f: | ||
nbformat.write(nb, f) | ||
|
||
|
||
if __name__ == "__main__": | ||
ROOT_DIR = Path(__file__).parent.parent | ||
notebook_fps = list(ROOT_DIR.glob("**/*.ipynb")) | ||
for fp in notebook_fps: | ||
clear_execution_numbers(fp) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kouroshHakha just wanna highlight that this is direct edit to the existing config.
I think having a separate config with liger enabled is also doable, but given that we've tested out liger extensively regarding correctness, I'm fine with having this be in the defaults to squeeze out more performance - A lot of optionality is also confusing to the user.