From d7e3f7f20baf877756e65e3304a2cceb95e29b56 Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Fri, 8 Nov 2024 13:38:20 -0800
Subject: [PATCH 01/10] update router template after v0.5.8

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 templates/llm-router/README.ipynb             | 59 +++++++++++++------
 templates/llm-router/README.md                | 14 ++++-
 .../llm-router/configs/ft_config_a10.yaml     |  4 +-
 templates/llm-router/src/clear_cell_nums.py   | 23 ++++++++
 4 files changed, 79 insertions(+), 21 deletions(-)
 create mode 100644 templates/llm-router/src/clear_cell_nums.py

diff --git a/templates/llm-router/README.ipynb b/templates/llm-router/README.ipynb
index 8ba4bcb12..72dc6bfe2 100644
--- a/templates/llm-router/README.ipynb
+++ b/templates/llm-router/README.ipynb
@@ -111,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -306,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -330,7 +330,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -461,7 +461,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -578,7 +578,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -644,7 +644,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -761,7 +761,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -904,7 +904,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -948,7 +948,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -988,7 +988,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1025,7 +1025,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1055,7 +1055,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1079,7 +1079,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1092,7 +1092,9 @@
       "context_length: 1024\n",
       "num_devices: 8\n",
       "num_epochs: 5\n",
-      "checkpoint_every_n_epochs: 5\n",
+      "checkpoint_and_evaluation_frequency: \n",
+      "  unit: epochs\n",
+      "  frequency: 5\n",
       "train_batch_size_per_device: 4\n",
       "eval_batch_size_per_device: 4\n",
       "lr_scheduler_type: constant\n",
@@ -1120,7 +1122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1206,7 +1208,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1273,7 +1275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1371,7 +1373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1400,6 +1402,25 @@
     "This plot illustrates that as we relax the cost constraints (i.e., increase the percentage of GPT-4 calls), the performance improves. While the performance of a random router improves linearly with cost, our router achieves significantly better results at each cost level."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleanup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cleanup\n",
+    "!python src/clear_cell_nums.py\n",
+    "!find . | grep -E \".ipynb_checkpoints\" | xargs rm -rf\n",
+    "!find . | grep -E \"(__pycache__|\\.pyc|\\.pyo)\" | xargs rm -rf"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1425,7 +1446,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/templates/llm-router/README.md b/templates/llm-router/README.md
index ce5a8b1ab..8c787cbd6 100644
--- a/templates/llm-router/README.md
+++ b/templates/llm-router/README.md
@@ -715,7 +715,9 @@ For this tutorial, we will perform full-parameter finetuning of Llama3-8B on the
     context_length: 1024
     num_devices: 8
     num_epochs: 5
-    checkpoint_every_n_epochs: 5
+    checkpoint_and_evaluation_frequency: 
+      unit: epochs
+      frequency: 5
     train_batch_size_per_device: 4
     eval_batch_size_per_device: 4
     lr_scheduler_type: constant
@@ -912,5 +914,15 @@ display(Image(filename=image_path))
 
 This plot illustrates that as we relax the cost constraints (i.e., increase the percentage of GPT-4 calls), the performance improves. While the performance of a random router improves linearly with cost, our router achieves significantly better results at each cost level.
 
+## Cleanup
+
+
+```python
+# Cleanup
+!python src/clear_cell_nums.py
+!find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
+!find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
+```
+
 # Conclusion
 In this tutorial, we have successfully built and evaluated a finetuned-LLM router. We generated synthetic labeled data using the LLM-as-a-judge method to train the model, finetuned an LLM classifier using Anyscale's API, and conducted offline evaluation on a standard benchmark-- demonstrating that our model is effective in out-of-domain generalization.
diff --git a/templates/llm-router/configs/ft_config_a10.yaml b/templates/llm-router/configs/ft_config_a10.yaml
index 60cfcd94b..c7ead657f 100644
--- a/templates/llm-router/configs/ft_config_a10.yaml
+++ b/templates/llm-router/configs/ft_config_a10.yaml
@@ -4,7 +4,9 @@ valid_path: /mnt/user_storage/train_data_sample.jsonl
 context_length: 1024
 num_devices: 8
 num_epochs: 5
-checkpoint_every_n_epochs: 5
+checkpoint_and_evaluation_frequency:
+  unit: epochs
+  frequency: 5
 train_batch_size_per_device: 4
 eval_batch_size_per_device: 4
 lr_scheduler_type: constant
diff --git a/templates/llm-router/src/clear_cell_nums.py b/templates/llm-router/src/clear_cell_nums.py
new file mode 100644
index 000000000..e7147d82e
--- /dev/null
+++ b/templates/llm-router/src/clear_cell_nums.py
@@ -0,0 +1,23 @@
+from pathlib import Path
+
+import nbformat
+
+
+def clear_execution_numbers(nb_path):
+    with open(nb_path, "r", encoding="utf-8") as f:
+        nb = nbformat.read(f, as_version=4)
+    for cell in nb["cells"]:
+        if cell["cell_type"] == "code":
+            cell["execution_count"] = None
+            for output in cell["outputs"]:
+                if "execution_count" in output:
+                    output["execution_count"] = None
+    with open(nb_path, "w", encoding="utf-8") as f:
+        nbformat.write(nb, f)
+
+
+if __name__ == "__main__":
+    ROOT_DIR = Path(__file__).parent.parent
+    notebook_fps = list(ROOT_DIR.glob("**/*.ipynb"))
+    for fp in notebook_fps:
+        clear_execution_numbers(fp)

From 9369d948b3e81c3449b7c2e12fa67ea72db77d90 Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Fri, 8 Nov 2024 14:39:09 -0800
Subject: [PATCH 02/10] minor updates

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 templates/e2e-dspy-workflow/README.ipynb        | 4 ++--
 templates/e2e-dspy-workflow/README.md           | 2 +-
 templates/e2e-dspy-workflow/configs/job.yaml    | 2 +-
 templates/e2e-llm-workflows/deploy/jobs/ft.yaml | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/templates/e2e-dspy-workflow/README.ipynb b/templates/e2e-dspy-workflow/README.ipynb
index 264a6ce18..9ab562e0e 100644
--- a/templates/e2e-dspy-workflow/README.ipynb
+++ b/templates/e2e-dspy-workflow/README.ipynb
@@ -863,7 +863,7 @@
        "    <span style=\"color: #008000; text-decoration-color: #008000\">'name'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'dspy-llmforge-fine-tuning-job'</span>,\n",
        "    <span style=\"color: #008000; text-decoration-color: #008000\">'entrypoint'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml'</span>,\n",
        "    <span style=\"color: #008000; text-decoration-color: #008000\">'working_dir'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'.'</span>,\n",
-       "    <span style=\"color: #008000; text-decoration-color: #008000\">'image_uri'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'localhost:5555/anyscale/llm-forge:0.5.7'</span>\n",
+       "    <span style=\"color: #008000; text-decoration-color: #008000\">'image_uri'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'localhost:5555/anyscale/llm-forge:0.5.8'</span>\n",
        "<span style=\"font-weight: bold\">}</span>\n",
        "</pre>\n"
       ],
@@ -872,7 +872,7 @@
        "    \u001b[32m'name'\u001b[0m: \u001b[32m'dspy-llmforge-fine-tuning-job'\u001b[0m,\n",
        "    \u001b[32m'entrypoint'\u001b[0m: \u001b[32m'llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml'\u001b[0m,\n",
        "    \u001b[32m'working_dir'\u001b[0m: \u001b[32m'.'\u001b[0m,\n",
-       "    \u001b[32m'image_uri'\u001b[0m: \u001b[32m'localhost:5555/anyscale/llm-forge:0.5.7'\u001b[0m\n",
+       "    \u001b[32m'image_uri'\u001b[0m: \u001b[32m'localhost:5555/anyscale/llm-forge:0.5.8'\u001b[0m\n",
        "\u001b[1m}\u001b[0m\n"
       ]
      },
diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md
index f805853ea..50e7ac16d 100644
--- a/templates/e2e-dspy-workflow/README.md
+++ b/templates/e2e-dspy-workflow/README.md
@@ -519,7 +519,7 @@ rich.print(yaml.safe_load(open(job_config_path)))
     <span style="color: #008000; text-decoration-color: #008000">'name'</span>: <span style="color: #008000; text-decoration-color: #008000">'dspy-llmforge-fine-tuning-job'</span>,
     <span style="color: #008000; text-decoration-color: #008000">'entrypoint'</span>: <span style="color: #008000; text-decoration-color: #008000">'llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml'</span>,
     <span style="color: #008000; text-decoration-color: #008000">'working_dir'</span>: <span style="color: #008000; text-decoration-color: #008000">'.'</span>,
-    <span style="color: #008000; text-decoration-color: #008000">'image_uri'</span>: <span style="color: #008000; text-decoration-color: #008000">'localhost:5555/anyscale/llm-forge:0.5.7'</span>
+    <span style="color: #008000; text-decoration-color: #008000">'image_uri'</span>: <span style="color: #008000; text-decoration-color: #008000">'localhost:5555/anyscale/llm-forge:0.5.8'</span>
 <span style="font-weight: bold">}</span>
 </pre>
 
diff --git a/templates/e2e-dspy-workflow/configs/job.yaml b/templates/e2e-dspy-workflow/configs/job.yaml
index 041d0df8b..dcc295e2a 100644
--- a/templates/e2e-dspy-workflow/configs/job.yaml
+++ b/templates/e2e-dspy-workflow/configs/job.yaml
@@ -1,4 +1,4 @@
 name: "dspy-llmforge-fine-tuning-job"
 entrypoint: "llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml"
 working_dir: "."
-image_uri: "localhost:5555/anyscale/llm-forge:0.5.7"
+image_uri: "localhost:5555/anyscale/llm-forge:0.5.8"
diff --git a/templates/e2e-llm-workflows/deploy/jobs/ft.yaml b/templates/e2e-llm-workflows/deploy/jobs/ft.yaml
index 6d6268c15..34e45d6cd 100644
--- a/templates/e2e-llm-workflows/deploy/jobs/ft.yaml
+++ b/templates/e2e-llm-workflows/deploy/jobs/ft.yaml
@@ -1,6 +1,6 @@
 name: e2e-llm-workflows
 entrypoint: llmforge anyscale finetune configs/training/lora/llama-3-8b.yaml
-image_uri: localhost:5555/anyscale/llm-forge:0.5.7
+image_uri: localhost:5555/anyscale/llm-forge:0.5.8
 requirements: []
 max_retries: 1
 excludes: ["assets"]

From 0a2853fbb6b088fbfbafd9827ca2691a37be98c5 Mon Sep 17 00:00:00 2001
From: Eric Tang <erictang000@gmail.com>
Date: Wed, 13 Nov 2024 12:47:26 -0800
Subject: [PATCH 03/10] add liger to default config

---
 templates/e2e-dspy-workflow/README.md                    | 4 ----
 .../meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml       | 9 +++++++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md
index 50e7ac16d..2f4207ebb 100644
--- a/templates/e2e-dspy-workflow/README.md
+++ b/templates/e2e-dspy-workflow/README.md
@@ -378,8 +378,6 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0])
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
-
-
     Program output label: extra_charge_on_statement
 
 
@@ -794,8 +792,6 @@ except ValueError as e:
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
-
-
     Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
     Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
index 530f8a0a7..f44b0fc42 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
@@ -41,6 +41,15 @@ deepspeed:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
+liger_kernel:
+  enabled: True
+  kwargs:
+    rms_norm: True
+    rope: True
+    swiglu: True
+    cross_entropy: True
+    fused_linear_cross_entropy: False
+
 # Lora configuration
 lora_config:
   r: 8

From e17633100d59d93ba051306e6e8e797f93ad1bf0 Mon Sep 17 00:00:00 2001
From: Eric Tang <erictang000@gmail.com>
Date: Wed, 13 Nov 2024 14:10:07 -0800
Subject: [PATCH 04/10] update liger configs

---
 .../Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml         | 10 ++++++++++
 .../Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml          | 10 ++++++++++
 .../meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml     |  1 +
 3 files changed, 21 insertions(+)

diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml
index 8170f8dd7..41d3871d4 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml
@@ -50,6 +50,16 @@ logger:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
+# Liger kernel configuration
+liger_kernel:
+  enabled: True
+  kwargs:
+    rms_norm: False
+    rope: True
+    swiglu: True
+    cross_entropy: True
+    fused_linear_cross_entropy: False
+
 # Lora configuration
 lora_config:
   r: 8
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml
index 52e69bc8c..708d7239f 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml
@@ -45,6 +45,16 @@ logger:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
+# Liger kernel configuration
+liger_kernel:
+  enabled: True
+  kwargs:
+    rms_norm: False
+    rope: True
+    swiglu: True
+    cross_entropy: True
+    fused_linear_cross_entropy: False
+
 # Lora configuration
 lora_config:
   r: 8
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
index f44b0fc42..4abcec12c 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
@@ -41,6 +41,7 @@ deepspeed:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
+# Liger kernel configuration
 liger_kernel:
   enabled: True
   kwargs:

From f3d9f80526823cf9569ec9456533f74d15add4cb Mon Sep 17 00:00:00 2001
From: Eric Tang <erictang000@gmail.com>
Date: Wed, 13 Nov 2024 15:33:06 -0800
Subject: [PATCH 05/10] add lora to 8xa100 lora 70b config

---
 .../Meta-Llama-3-70B/full/16xA100-80G-4k.yaml          |  2 +-
 .../Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml           | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml
index 8df4d97a3..9289d8be5 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/full/16xA100-80G-4k.yaml
@@ -32,7 +32,7 @@ num_checkpoints_to_keep: 1
 
 # Deepspeed configuration, you can provide your own deepspeed setup
 deepspeed:
-  config_path: deepspeed_configs/zero_3_hpz.json
+  config_path: deepspeed_configs/zero_3_offload_optim+param.json
 
 # Accelerator type, we value of 0.001 is not important, as long as it is
 # beteween 0 and 1. This ensures that accelerator type is used per trainer
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml
index f9a63cf97..6f8e355d2 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml
@@ -40,6 +40,16 @@ deepspeed:
 worker_resources:
   accelerator_type:A100-80G: 0.001
 
+# Liger kernel configuration
+liger_kernel:
+  enabled: True
+  kwargs:
+    rms_norm: True
+    rope: True
+    swiglu: True
+    cross_entropy: True
+    fused_linear_cross_entropy: False
+
 # Lora configuration
 lora_config:
   r: 8

From 64548654ee7b74ad3f8f1309061a403fd500cd54 Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Wed, 13 Nov 2024 15:41:45 -0800
Subject: [PATCH 06/10] x

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 templates/e2e-dspy-workflow/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md
index 2f4207ebb..50e7ac16d 100644
--- a/templates/e2e-dspy-workflow/README.md
+++ b/templates/e2e-dspy-workflow/README.md
@@ -378,6 +378,8 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0])
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
+
+
     Program output label: extra_charge_on_statement
 
 
@@ -792,6 +794,8 @@ except ValueError as e:
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
+
+
     Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
     Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])

From b0281d757ce53daf747400d10119515c1bbcc15a Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Tue, 19 Nov 2024 23:22:56 -0800
Subject: [PATCH 07/10] address comments

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 templates/e2e-dspy-workflow/README.md         |  4 -
 .../Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml  | 10 ---
 .../Meta-Llama-3-8B/lora/4xA10-512-liger.yaml | 77 +++++++++++++++++++
 .../lora/4xA10-512-mlflow.yaml                | 10 ---
 .../Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml | 10 ---
 .../Meta-Llama-3-8B/lora/4xA10-512.yaml       | 10 ---
 6 files changed, 77 insertions(+), 44 deletions(-)
 create mode 100644 templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml

diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md
index 50e7ac16d..2f4207ebb 100644
--- a/templates/e2e-dspy-workflow/README.md
+++ b/templates/e2e-dspy-workflow/README.md
@@ -378,8 +378,6 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0])
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
-
-
     Program output label: extra_charge_on_statement
 
 
@@ -794,8 +792,6 @@ except ValueError as e:
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
-
-
     Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
     Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml
index 6f8e355d2..f9a63cf97 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-70B/lora/8xA100-80G-4k.yaml
@@ -40,16 +40,6 @@ deepspeed:
 worker_resources:
   accelerator_type:A100-80G: 0.001
 
-# Liger kernel configuration
-liger_kernel:
-  enabled: True
-  kwargs:
-    rms_norm: True
-    rope: True
-    swiglu: True
-    cross_entropy: True
-    fused_linear_cross_entropy: False
-
 # Lora configuration
 lora_config:
   r: 8
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml
new file mode 100644
index 000000000..5eb957914
--- /dev/null
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml
@@ -0,0 +1,77 @@
+# Change this to the model you want to fine-tune
+model_id: meta-llama/Meta-Llama-3-8B-Instruct
+
+# Change this to the path to your training data
+train_path: s3://air-example-data/gsm8k/train.jsonl
+
+# Change this to the path to your validation data. This is optional
+valid_path: s3://air-example-data/gsm8k/test.jsonl
+
+# Change this to the context length you want to use. Examples with longer
+# context length will be truncated.
+context_length: 512
+
+# Change this to total number of GPUs that you want to use
+num_devices: 4
+
+# Change this to the number of epochs that you want to train for
+num_epochs: 3
+
+# Change this to the batch size that you want to use
+train_batch_size_per_device: 2
+eval_batch_size_per_device: 4
+gradient_accumulation_steps: 2
+
+
+# Change this to the learning rate that you want to use
+learning_rate: 1e-4
+
+# This will pad batches to the longest sequence. Use "max_length" when profiling to profile the worst case.
+padding: "longest"
+
+# By default, we will keep the best checkpoint. You can change this to keep more checkpoints.
+num_checkpoints_to_keep: 1
+
+# Deepspeed configuration, you can provide your own deepspeed setup
+deepspeed:
+  config_path: deepspeed_configs/zero_2.json
+
+logger:
+  provider: wandb
+
+# Accelerator type, we value of 0.001 is not important, as long as it is
+# beteween 0 and 1. This ensures that accelerator type is used per trainer
+# worker.
+worker_resources:
+  anyscale/accelerator_shape:4xA10G: 0.001
+
+# Liger kernel configuration
+liger_kernel:
+  enabled: True
+  # You can further customize the individual liger kernel configurations here. By default,
+  # all the `kwargs` are `True` when liger is enabled.
+  # kwargs:
+  #   rms_norm: False
+  #   rope: True
+  #   swiglu: True
+  #   cross_entropy: True
+  #   fused_linear_cross_entropy: False
+
+# Lora configuration
+lora_config:
+  r: 8
+  lora_alpha: 16
+  lora_dropout: 0.05
+  target_modules:
+    - q_proj
+    - v_proj
+    - k_proj
+    - o_proj
+    - gate_proj
+    - up_proj
+    - down_proj
+    - embed_tokens
+    - lm_head
+  task_type: "CAUSAL_LM"
+  bias: "none"
+  modules_to_save: []
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml
index 41d3871d4..8170f8dd7 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-mlflow.yaml
@@ -50,16 +50,6 @@ logger:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
-# Liger kernel configuration
-liger_kernel:
-  enabled: True
-  kwargs:
-    rms_norm: False
-    rope: True
-    swiglu: True
-    cross_entropy: True
-    fused_linear_cross_entropy: False
-
 # Lora configuration
 lora_config:
   r: 8
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml
index 708d7239f..52e69bc8c 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-wandb.yaml
@@ -45,16 +45,6 @@ logger:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
-# Liger kernel configuration
-liger_kernel:
-  enabled: True
-  kwargs:
-    rms_norm: False
-    rope: True
-    swiglu: True
-    cross_entropy: True
-    fused_linear_cross_entropy: False
-
 # Lora configuration
 lora_config:
   r: 8
diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
index 4abcec12c..530f8a0a7 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512.yaml
@@ -41,16 +41,6 @@ deepspeed:
 worker_resources:
   anyscale/accelerator_shape:4xA10G: 0.001
 
-# Liger kernel configuration
-liger_kernel:
-  enabled: True
-  kwargs:
-    rms_norm: True
-    rope: True
-    swiglu: True
-    cross_entropy: True
-    fused_linear_cross_entropy: False
-
 # Lora configuration
 lora_config:
   r: 8

From 0a0c8dcc56441cedfbe23e9f52effb9826c65666 Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Wed, 20 Nov 2024 14:56:49 -0800
Subject: [PATCH 08/10] x

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 templates/e2e-dspy-workflow/README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md
index 50e7ac16d..2f4207ebb 100644
--- a/templates/e2e-dspy-workflow/README.md
+++ b/templates/e2e-dspy-workflow/README.md
@@ -378,8 +378,6 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0])
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
-
-
     Program output label: extra_charge_on_statement
 
 
@@ -794,8 +792,6 @@ except ValueError as e:
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
-
-
     Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
     Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])

From 1a9490a994efcd88861a6e9483fc4618c80df715 Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Wed, 20 Nov 2024 15:00:13 -0800
Subject: [PATCH 09/10] x

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 .../meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml
index 5eb957914..a46f6ba7c 100644
--- a/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml
+++ b/templates/fine-tune-llm_v2/training_configs/custom/meta-llama/Meta-Llama-3-8B/lora/4xA10-512-liger.yaml
@@ -51,11 +51,11 @@ liger_kernel:
   # You can further customize the individual liger kernel configurations here. By default,
   # all the `kwargs` are `True` when liger is enabled.
   # kwargs:
-  #   rms_norm: False
+  #   rms_norm: True
   #   rope: True
   #   swiglu: True
   #   cross_entropy: True
-  #   fused_linear_cross_entropy: False
+  #   fused_linear_cross_entropy: True
 
 # Lora configuration
 lora_config:

From 8d12ca7495a5a2a699c8f40d10c7301b3c6c2771 Mon Sep 17 00:00:00 2001
From: SumanthRH <sumanthrh@anyscale.com>
Date: Wed, 20 Nov 2024 15:08:19 -0800
Subject: [PATCH 10/10] x

Signed-off-by: SumanthRH <sumanthrh@anyscale.com>
---
 templates/e2e-dspy-workflow/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/templates/e2e-dspy-workflow/README.md b/templates/e2e-dspy-workflow/README.md
index 2f4207ebb..50e7ac16d 100644
--- a/templates/e2e-dspy-workflow/README.md
+++ b/templates/e2e-dspy-workflow/README.md
@@ -378,6 +378,8 @@ sanity_check_program(llama_70b, vanilla_program, ft_trainset[0])
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
+
+
     Program output label: extra_charge_on_statement
 
 
@@ -792,6 +794,8 @@ except ValueError as e:
 ```
 
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
+
+
     Non fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])
     Program input: Example({'text': 'I still have not received an answer as to why I was charged $1.00 in a transaction?'}) (input_keys={'text'})
     Fine-tuned model returned invalid output out and errored out with Expected dict_keys(['reasoning', 'label']) but got dict_keys([])