From d7e3f7f20baf877756e65e3304a2cceb95e29b56 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Fri, 8 Nov 2024 13:38:20 -0800 Subject: [PATCH 01/10] update router template after v0.5.8 Signed-off-by: SumanthRH --- templates/llm-router/README.ipynb | 59 +++++++++++++------ templates/llm-router/README.md | 14 ++++- .../llm-router/configs/ft_config_a10.yaml | 4 +- templates/llm-router/src/clear_cell_nums.py | 23 ++++++++ 4 files changed, 79 insertions(+), 21 deletions(-) create mode 100644 templates/llm-router/src/clear_cell_nums.py diff --git a/templates/llm-router/README.ipynb b/templates/llm-router/README.ipynb index 8ba4bcb12..72dc6bfe2 100644 --- a/templates/llm-router/README.ipynb +++ b/templates/llm-router/README.ipynb @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -306,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -330,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -461,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -578,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -644,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -761,7 +761,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -904,7 +904,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -948,7 +948,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -988,7 +988,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1025,7 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1055,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1079,7 +1079,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1092,7 +1092,9 @@ "context_length: 1024\n", "num_devices: 8\n", "num_epochs: 5\n", - "checkpoint_every_n_epochs: 5\n", + "checkpoint_and_evaluation_frequency: \n", + " unit: epochs\n", + " frequency: 5\n", "train_batch_size_per_device: 4\n", "eval_batch_size_per_device: 4\n", "lr_scheduler_type: constant\n", @@ -1120,7 +1122,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1206,7 +1208,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1273,7 +1275,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1371,7 +1373,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1400,6 +1402,25 @@ "This plot illustrates that as we relax the cost constraints (i.e., increase the percentage of GPT-4 calls), the performance improves. While the performance of a random router improves linearly with cost, our router achieves significantly better results at each cost level." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cleanup\n", + "!python src/clear_cell_nums.py\n", + "!find . | grep -E \".ipynb_checkpoints\" | xargs rm -rf\n", + "!find . | grep -E \"(__pycache__|\\.pyc|\\.pyo)\" | xargs rm -rf" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1425,7 +1446,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/templates/llm-router/README.md b/templates/llm-router/README.md index ce5a8b1ab..8c787cbd6 100644 --- a/templates/llm-router/README.md +++ b/templates/llm-router/README.md @@ -715,7 +715,9 @@ For this tutorial, we will perform full-parameter finetuning of Llama3-8B on the context_length: 1024 num_devices: 8 num_epochs: 5 - checkpoint_every_n_epochs: 5 + checkpoint_and_evaluation_frequency: + unit: epochs + frequency: 5 train_batch_size_per_device: 4 eval_batch_size_per_device: 4 lr_scheduler_type: constant @@ -912,5 +914,15 @@ display(Image(filename=image_path)) This plot illustrates that as we relax the cost constraints (i.e., increase the percentage of GPT-4 calls), the performance improves. While the performance of a random router improves linearly with cost, our router achieves significantly better results at each cost level. +## Cleanup + + +```python +# Cleanup +!python src/clear_cell_nums.py +!find . | grep -E ".ipynb_checkpoints" | xargs rm -rf +!find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf +``` + # Conclusion In this tutorial, we have successfully built and evaluated a finetuned-LLM router. We generated synthetic labeled data using the LLM-as-a-judge method to train the model, finetuned an LLM classifier using Anyscale's API, and conducted offline evaluation on a standard benchmark-- demonstrating that our model is effective in out-of-domain generalization. diff --git a/templates/llm-router/configs/ft_config_a10.yaml b/templates/llm-router/configs/ft_config_a10.yaml index 60cfcd94b..c7ead657f 100644 --- a/templates/llm-router/configs/ft_config_a10.yaml +++ b/templates/llm-router/configs/ft_config_a10.yaml @@ -4,7 +4,9 @@ valid_path: /mnt/user_storage/train_data_sample.jsonl context_length: 1024 num_devices: 8 num_epochs: 5 -checkpoint_every_n_epochs: 5 +checkpoint_and_evaluation_frequency: + unit: epochs + frequency: 5 train_batch_size_per_device: 4 eval_batch_size_per_device: 4 lr_scheduler_type: constant diff --git a/templates/llm-router/src/clear_cell_nums.py b/templates/llm-router/src/clear_cell_nums.py new file mode 100644 index 000000000..e7147d82e --- /dev/null +++ b/templates/llm-router/src/clear_cell_nums.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import nbformat + + +def clear_execution_numbers(nb_path): + with open(nb_path, "r", encoding="utf-8") as f: + nb = nbformat.read(f, as_version=4) + for cell in nb["cells"]: + if cell["cell_type"] == "code": + cell["execution_count"] = None + for output in cell["outputs"]: + if "execution_count" in output: + output["execution_count"] = None + with open(nb_path, "w", encoding="utf-8") as f: + nbformat.write(nb, f) + + +if __name__ == "__main__": + ROOT_DIR = Path(__file__).parent.parent + notebook_fps = list(ROOT_DIR.glob("**/*.ipynb")) + for fp in notebook_fps: + clear_execution_numbers(fp) From 9369d948b3e81c3449b7c2e12fa67ea72db77d90 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Fri, 8 Nov 2024 14:39:09 -0800 Subject: [PATCH 02/10] minor updates Signed-off-by: SumanthRH --- templates/e2e-dspy-workflow/README.ipynb | 4 ++-- templates/e2e-dspy-workflow/README.md | 2 +- templates/e2e-dspy-workflow/configs/job.yaml | 2 +- templates/e2e-llm-workflows/deploy/jobs/ft.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/e2e-dspy-workflow/README.ipynb b/templates/e2e-dspy-workflow/README.ipynb index 264a6ce18..9ab562e0e 100644 --- a/templates/e2e-dspy-workflow/README.ipynb +++ b/templates/e2e-dspy-workflow/README.ipynb @@ -863,7 +863,7 @@ " 'name': 'dspy-llmforge-fine-tuning-job',\n", " 'entrypoint': 'llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml',\n", " 'working_dir': '.',\n", - " 'image_uri': 'localhost:5555/anyscale/llm-forge:0.5.7'\n", + " 'image_uri': 'localhost:5555/anyscale/llm-forge:0.5.8'\n", "}\n", "\n" ], @@ -872,7 +872,7 @@ " \u001b[32m'name'\u001b[0m: \u001b[32m'dspy-llmforge-fine-tuning-job'\u001b[0m,\n", " \u001b[32m'entrypoint'\u001b[0m: \u001b[32m'llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml'\u001b[0m,\n", " \u001b[32m'working_dir'\u001b[0m: \u001b[32m'.'\u001b[0m,\n", - " \u001b[32m'image_uri'\u001b[0m: \u001b[32m'localhost:5555/anyscale/llm-forge:0.5.7'\u001b[0m\n", + " \u001b[32m'image_uri'\u001b[0m: \u001b[32m'localhost:5555/anyscale/llm-forge:0.5.8'\u001b[0m\n", "\u001b[1m}\u001b[0m\n" ] }, diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md index f805853ea..50e7ac16d 100644 --- a/templates/e2e-dspy-workflow/README.md +++ b/templates/e2e-dspy-workflow/README.md @@ -519,7 +519,7 @@ rich.print(yaml.safe_load(open(job_config_path))) 'name': 'dspy-llmforge-fine-tuning-job', 'entrypoint': 'llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml', 'working_dir': '.', - 'image_uri': 'localhost:5555/anyscale/llm-forge:0.5.7' + 'image_uri': 'localhost:5555/anyscale/llm-forge:0.5.8' } diff --git a/templates/e2e-dspy-workflow/configs/job.yaml b/templates/e2e-dspy-workflow/configs/job.yaml index 041d0df8b..dcc295e2a 100644 --- a/templates/e2e-dspy-workflow/configs/job.yaml +++ b/templates/e2e-dspy-workflow/configs/job.yaml @@ -1,4 +1,4 @@ name: "dspy-llmforge-fine-tuning-job" entrypoint: "llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml" working_dir: "." -image_uri: "localhost:5555/anyscale/llm-forge:0.5.7" +image_uri: "localhost:5555/anyscale/llm-forge:0.5.8" diff --git a/templates/e2e-llm-workflows/deploy/jobs/ft.yaml b/templates/e2e-llm-workflows/deploy/jobs/ft.yaml index 6d6268c15..34e45d6cd 100644 --- a/templates/e2e-llm-workflows/deploy/jobs/ft.yaml +++ b/templates/e2e-llm-workflows/deploy/jobs/ft.yaml @@ -1,6 +1,6 @@ name: e2e-llm-workflows entrypoint: llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml -image_uri: localhost:5555/anyscale/llm-forge:0.5.7 +image_uri: localhost:5555/anyscale/llm-forge:0.5.8 requirements: [] max_retries: 1 excludes: ["assets"] From 0a2853fbb6b088fbfbafd9827ca2691a37be98c5 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Wed, 13 Nov 2024 12:47:26 -0800 Subject: [PATCH 03/10] add liger to default config --- templates/e2e-dspy-workflow/README.md | 4 ---- .../meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml | 9 +++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md index 50e7ac16d..2f4207ebb 100644 --- a/templates/e2e-dspy-workflow/README.md +++ b/templates/e2e-dspy-workflow/README.md @@ -378,8 +378,6 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0]) ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) - - Program output label: extra_charge_on_statement @@ -794,8 +792,6 @@ except ValueError as e: ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) - - Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml index 530f8a0a7..f44b0fc42 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml @@ -41,6 +41,15 @@ deepspeed: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 +liger_kernel: + enabled: True + kwargs: + rms_norm: True + rope: True + swiglu: True + cross_entropy: True + fused_linear_cross_entropy: False + # Lora configuration lora_config: r: 8 From e17633100d59d93ba051306e6e8e797f93ad1bf0 Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Wed, 13 Nov 2024 14:10:07 -0800 Subject: [PATCH 04/10] update liger configs --- .../Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml | 10 ++++++++++ .../Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml | 10 ++++++++++ .../meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml | 1 + 3 files changed, 21 insertions(+) diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml index 8170f8dd7..41d3871d4 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml @@ -50,6 +50,16 @@ logger: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 +# Liger kernel configuration +liger_kernel: + enabled: True + kwargs: + rms_norm: False + rope: True + swiglu: True + cross_entropy: True + fused_linear_cross_entropy: False + # Lora configuration lora_config: r: 8 diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml index 52e69bc8c..708d7239f 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml @@ -45,6 +45,16 @@ logger: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 +# Liger kernel configuration +liger_kernel: + enabled: True + kwargs: + rms_norm: False + rope: True + swiglu: True + cross_entropy: True + fused_linear_cross_entropy: False + # Lora configuration lora_config: r: 8 diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml index f44b0fc42..4abcec12c 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml @@ -41,6 +41,7 @@ deepspeed: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 +# Liger kernel configuration liger_kernel: enabled: True kwargs: From f3d9f80526823cf9569ec9456533f74d15add4cb Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Wed, 13 Nov 2024 15:33:06 -0800 Subject: [PATCH 05/10] add lora to 8xa100 lora 70b config --- .../Meta-Llama-3-70B/full/16xA100-80G-4k.yaml | 2 +- .../Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml index 8df4d97a3..9289d8be5 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml @@ -32,7 +32,7 @@ num_checkpoints_to_keep: 1 # Deepspeed configuration, you can provide your own deepspeed setup deepspeed: - config_path: deepspeed_configs/zero_3_hpz.json + config_path: deepspeed_configs/zero_3_offload_optim+param.json # Accelerator type, we value of 0.001 is not important, as long as it is # beteween 0 and 1. This ensures that accelerator type is used per trainer diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml index f9a63cf97..6f8e355d2 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml @@ -40,6 +40,16 @@ deepspeed: worker_resources: accelerator_type:A100-80G: 0.001 +# Liger kernel configuration +liger_kernel: + enabled: True + kwargs: + rms_norm: True + rope: True + swiglu: True + cross_entropy: True + fused_linear_cross_entropy: False + # Lora configuration lora_config: r: 8 From 64548654ee7b74ad3f8f1309061a403fd500cd54 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Wed, 13 Nov 2024 15:41:45 -0800 Subject: [PATCH 06/10] x Signed-off-by: SumanthRH --- templates/e2e-dspy-workflow/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md index 2f4207ebb..50e7ac16d 100644 --- a/templates/e2e-dspy-workflow/README.md +++ b/templates/e2e-dspy-workflow/README.md @@ -378,6 +378,8 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0]) ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) + + Program output label: extra_charge_on_statement @@ -792,6 +794,8 @@ except ValueError as e: ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) + + Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) From b0281d757ce53daf747400d10119515c1bbcc15a Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Tue, 19 Nov 2024 23:22:56 -0800 Subject: [PATCH 07/10] address comments Signed-off-by: SumanthRH --- templates/e2e-dspy-workflow/README.md | 4 - .../Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml | 10 --- .../Meta-Llama-3-8B/lora/4xA10-512-liger.yaml | 77 +++++++++++++++++++ .../lora/4xA10-512-mlflow.yaml | 10 --- .../Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml | 10 --- .../Meta-Llama-3-8B/lora/4xA10-512.yaml | 10 --- 6 files changed, 77 insertions(+), 44 deletions(-) create mode 100644 templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md index 50e7ac16d..2f4207ebb 100644 --- a/templates/e2e-dspy-workflow/README.md +++ b/templates/e2e-dspy-workflow/README.md @@ -378,8 +378,6 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0]) ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) - - Program output label: extra_charge_on_statement @@ -794,8 +792,6 @@ except ValueError as e: ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) - - Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml index 6f8e355d2..f9a63cf97 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml @@ -40,16 +40,6 @@ deepspeed: worker_resources: accelerator_type:A100-80G: 0.001 -# Liger kernel configuration -liger_kernel: - enabled: True - kwargs: - rms_norm: True - rope: True - swiglu: True - cross_entropy: True - fused_linear_cross_entropy: False - # Lora configuration lora_config: r: 8 diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml new file mode 100644 index 000000000..5eb957914 --- /dev/null +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml @@ -0,0 +1,77 @@ +# Change this to the model you want to fine-tune +model_id: meta-llama/Meta-Llama-3-8B-Instruct + +# Change this to the path to your training data +train_path: s3://air-example-data/gsm8k/train.jsonl + +# Change this to the path to your validation data. This is optional +valid_path: s3://air-example-data/gsm8k/test.jsonl + +# Change this to the context length you want to use. Examples with longer +# context length will be truncated. +context_length: 512 + +# Change this to total number of GPUs that you want to use +num_devices: 4 + +# Change this to the number of epochs that you want to train for +num_epochs: 3 + +# Change this to the batch size that you want to use +train_batch_size_per_device: 2 +eval_batch_size_per_device: 4 +gradient_accumulation_steps: 2 + + +# Change this to the learning rate that you want to use +learning_rate: 1e-4 + +# This will pad batches to the longest sequence. Use "max_length" when profiling to profile the worst case. +padding: "longest" + +# By default, we will keep the best checkpoint. You can change this to keep more checkpoints. +num_checkpoints_to_keep: 1 + +# Deepspeed configuration, you can provide your own deepspeed setup +deepspeed: + config_path: deepspeed_configs/zero_2.json + +logger: + provider: wandb + +# Accelerator type, we value of 0.001 is not important, as long as it is +# beteween 0 and 1. This ensures that accelerator type is used per trainer +# worker. +worker_resources: + anyscale/accelerator_shape:4xA10G: 0.001 + +# Liger kernel configuration +liger_kernel: + enabled: True + # You can further customize the individual liger kernel configurations here. By default, + # all the `kwargs` are `True` when liger is enabled. + # kwargs: + # rms_norm: False + # rope: True + # swiglu: True + # cross_entropy: True + # fused_linear_cross_entropy: False + +# Lora configuration +lora_config: + r: 8 + lora_alpha: 16 + lora_dropout: 0.05 + target_modules: + - q_proj + - v_proj + - k_proj + - o_proj + - gate_proj + - up_proj + - down_proj + - embed_tokens + - lm_head + task_type: "CAUSAL_LM" + bias: "none" + modules_to_save: [] diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml index 41d3871d4..8170f8dd7 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml @@ -50,16 +50,6 @@ logger: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 -# Liger kernel configuration -liger_kernel: - enabled: True - kwargs: - rms_norm: False - rope: True - swiglu: True - cross_entropy: True - fused_linear_cross_entropy: False - # Lora configuration lora_config: r: 8 diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml index 708d7239f..52e69bc8c 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml @@ -45,16 +45,6 @@ logger: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 -# Liger kernel configuration -liger_kernel: - enabled: True - kwargs: - rms_norm: False - rope: True - swiglu: True - cross_entropy: True - fused_linear_cross_entropy: False - # Lora configuration lora_config: r: 8 diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml index 4abcec12c..530f8a0a7 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml @@ -41,16 +41,6 @@ deepspeed: worker_resources: anyscale/accelerator_shape:4xA10G: 0.001 -# Liger kernel configuration -liger_kernel: - enabled: True - kwargs: - rms_norm: True - rope: True - swiglu: True - cross_entropy: True - fused_linear_cross_entropy: False - # Lora configuration lora_config: r: 8 From 0a0c8dcc56441cedfbe23e9f52effb9826c65666 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Wed, 20 Nov 2024 14:56:49 -0800 Subject: [PATCH 08/10] x Signed-off-by: SumanthRH --- templates/e2e-dspy-workflow/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md index 50e7ac16d..2f4207ebb 100644 --- a/templates/e2e-dspy-workflow/README.md +++ b/templates/e2e-dspy-workflow/README.md @@ -378,8 +378,6 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0]) ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) - - Program output label: extra_charge_on_statement @@ -794,8 +792,6 @@ except ValueError as e: ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) - - Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) From 1a9490a994efcd88861a6e9483fc4618c80df715 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Wed, 20 Nov 2024 15:00:13 -0800 Subject: [PATCH 09/10] x Signed-off-by: SumanthRH --- .../meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml index 5eb957914..a46f6ba7c 100644 --- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml +++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml @@ -51,11 +51,11 @@ liger_kernel: # You can further customize the individual liger kernel configurations here. By default, # all the `kwargs` are `True` when liger is enabled. # kwargs: - # rms_norm: False + # rms_norm: True # rope: True # swiglu: True # cross_entropy: True - # fused_linear_cross_entropy: False + # fused_linear_cross_entropy: True # Lora configuration lora_config: From 8d12ca7495a5a2a699c8f40d10c7301b3c6c2771 Mon Sep 17 00:00:00 2001 From: SumanthRH Date: Wed, 20 Nov 2024 15:08:19 -0800 Subject: [PATCH 10/10] x Signed-off-by: SumanthRH --- templates/e2e-dspy-workflow/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md index 2f4207ebb..50e7ac16d 100644 --- a/templates/e2e-dspy-workflow/README.md +++ b/templates/e2e-dspy-workflow/README.md @@ -378,6 +378,8 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0]) ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) + + Program output label: extra_charge_on_statement @@ -792,6 +794,8 @@ except ValueError as e: ``` Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) + + Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([]) Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'}) Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])