diff --git a/README.md b/README.md
index cdc991a506..aba8a5ad83 100644
--- a/README.md
+++ b/README.md
@@ -126,12 +126,12 @@ tune convert_checkpoint --checkpoint-path <CHECKPOINT_PATH>
 
 On a single GPU
 ```
-tune finetune_llm --config alpaca_llama2_finetune
+tune --nnodes 1 --nproc_per_node 1 full_finetune --config alpaca_llama2_full_finetune
 ```
 
 On multiple GPUs using FSDP
 ```
-tune --nnodes 1 --nproc_per_node 4 finetune_llm --config alpaca_llama2_finetune --fsdp True
+tune --nnodes 1 --nproc_per_node 4 full_finetune --config alpaca_llama2_full_finetune
 ```
 
 &nbsp;
@@ -140,9 +140,9 @@ tune --nnodes 1 --nproc_per_node 4 finetune_llm --config alpaca_llama2_finetune
 
 To copy a recipe to customize it yourself and then run
 ```
-tune recipe cp finetune_llm my_recipe/finetune_llm.py
-tune config cp alpaca_llama2_finetune my_recipe/alpaca_llama2_finetune.yaml
-tune my_recipe/finetune_llm.py --config my_recipe/alpaca_llama2_finetune.yaml
+tune recipe cp full_finetune my_recipe/full_finetune.py
+tune config cp alpaca_llama2_full_finetune my_recipe/alpaca_llama2_full_finetune.yaml
+tune my_recipe/full_finetune.py --config my_recipe/alpaca_llama2_full_finetune.yaml
 ```
 
 &nbsp;
@@ -154,15 +154,11 @@ recipes. Aside from torchtune recipe utilties, it integrates with ``torch.distri
 to support distributed job launching by default. ``tune`` offers everyting that ``torchrun``
 does with the following additional functionalities:
 
-1. ``tune <recipe> <recipe_args>`` with no optional ``torchrun`` options launches a single python process
+1. ``tune <torchrun_options> <recipe> <recipe_args>`` will launch a torchrun job
 
 2. ``<recipe>`` and recipe arg ``<config>`` can both be passed in as names instead of paths if they're included in torchtune
 
-3. ``tune <path/to/recipe.py> <recipe_args>`` can be used to launch local recipes
-
-4. ``tune <torchrun_options> <recipe> <recipe_args>`` will launch a torchrun job
-
-5. ``tune recipe`` and ``tune config`` commands provide utilities for listing and copying packaged recipes and configs
+3. ``tune recipe`` and ``tune config`` commands provide utilities for listing and copying packaged recipes and configs
 
 &nbsp;
 
diff --git a/docs/source/recipes/finetune_llm.rst b/docs/source/recipes/finetune_llm.rst
index 5a7e4e3d91..51fedd1eba 100644
--- a/docs/source/recipes/finetune_llm.rst
+++ b/docs/source/recipes/finetune_llm.rst
@@ -16,25 +16,3 @@ This recipe supports:
 * :ref:`Distributed Training with FSDP<dist_label>`
 
 * :ref:`Activation Checkpointing<ac_label>`
-
-To run the recipe directly, launch with
-
-.. code-block:: bash
-
-    tune finetune_llm --config <finetune_config>
-
-Recipe
-------
-
-Copy the recipe directly into your own script or notebook to modify and edit for yourself.
-
-.. literalinclude:: ../../../recipes/finetune_llm.py
-
-Configs
--------
-
-.. tabs::
-
-    .. tab:: alpaca_llama2_finetune
-
-        .. literalinclude:: ../../../recipes/configs/alpaca_llama2_finetune.yaml
diff --git a/recipes/__init__.py b/recipes/__init__.py
index 035c57172b..c7c543f119 100644
--- a/recipes/__init__.py
+++ b/recipes/__init__.py
@@ -5,8 +5,11 @@
 # LICENSE file in the root directory of this source tree.
 
 
-_RECIPE_LIST = ["finetune_llm", "alpaca_generate"]
-_CONFIG_LISTS = {"finetune_llm": ["alpaca_llama2_finetune"], "alpaca_generate": []}
+_RECIPE_LIST = ["full_finetune", "alpaca_generate"]
+_CONFIG_LISTS = {
+    "full_finetune": ["alpaca_llama2_full_finetune"],
+    "alpaca_generate": [],
+}
 
 
 def list_recipes():
diff --git a/recipes/configs/alpaca_llama2_finetune.yaml b/recipes/configs/alpaca_llama2_finetune.yaml
deleted file mode 100644
index db08e89660..0000000000
--- a/recipes/configs/alpaca_llama2_finetune.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Runs the finetune_llm.py recipe using FullFinetuneParams
-#
-# To launch, run the following command from root:
-#    tune finetune_llm --config alpaca_llama2_finetune --override model_checkpoint=<your_checkpoint_dir> ...
-
-# Dataset and Dataloader
-dataset: alpaca
-seed: null
-shuffle: True
-
-# Model Arguments
-model: llama2_7b
-model_checkpoint: /tmp/llama2-7b
-tokenizer: llama2_tokenizer
-tokenizer_checkpoint: /tmp/tokenizer.model
-
-# Fine-tuning arguments
-batch_size: 2
-lr: 2e-5
-epochs: 3
-optimizer: SGD
-loss: CrossEntropyLoss
-output_dir: /tmp/alpaca-llama2-finetune
-device: cuda
-dtype: fp32
-enable_activation_checkpointing: True
-enable_fsdp: True
-cpu_offload: False
-resume_from_checkpoint: False
-
-# Metrics arguments
-metric_logger_type: disk
diff --git a/recipes/finetune_llm.py b/recipes/finetune_llm.py
deleted file mode 100644
index 8e54748919..0000000000
--- a/recipes/finetune_llm.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-import argparse
-import os
-from functools import partial
-
-import torch
-from torch.cuda.amp import GradScaler
-from torch.utils.data import DataLoader, DistributedSampler
-
-from torchtune import datasets, losses, models, modules, optim, utils
-from torchtune.utils.checkpoint import load_checkpoint, save_checkpoint
-from torchtune.utils.generation import generate_from_prompt
-from tqdm import tqdm
-
-from recipes.params import FullFinetuneParams
-
-
-def recipe(
-    params: FullFinetuneParams,
-) -> None:
-    """Training loop for fine-tuning an LLM on a provided dataset. Supports evals,
-    checkpointing, and distributed training.
-
-    Args:
-        params (FullFinetuneParams): dataclass containing all args for recipe. See ``FullFinetuneParams`` for
-             more details.
-
-    Raises:
-        ValueError: If ``cpu_offload`` is ``True`` but ``device`` is not ``cuda`` and <= 1 GPUs.
-    """
-    # ---- Initialize components ---- #
-    distributed = utils.init_distributed()
-    world_size, rank = utils.get_world_size_and_rank()
-
-    logger = utils.get_logger("DEBUG")
-    metric_logger = utils.get_metric_logger(
-        metric_logger_type=params.metric_logger_type,
-        project=params.project,
-        log_dir=params.output_dir,
-    )
-
-    device = utils.get_device(params.device)
-    dtype = utils.get_dtype(params.dtype)
-    seed = utils.set_seed(params.seed)
-
-    # ---- Setup model and load checkpoint ---- #
-    tokenizer = models.get_tokenizer(params.tokenizer, path=params.tokenizer_checkpoint)
-    logger.info(msg=f"Loaded tokenizer from {params.tokenizer_checkpoint}")
-
-    # TODO: initialize models for distributed on meta or cpu device to avoid OOMs
-    model = models.get_model(params.model, device=device)
-
-    if params.cpu_offload and not distributed:
-        raise ValueError(
-            "CPU offload is only supported with FSDP in a distributed setting."
-            "Please launch in a distributed setting. If you do not wish to use > 1 GPU,"
-            "use ``tune --nnodes 1 --nproc_per_node 1 ...``. FSDP will not shard"
-            "any parameters."
-        )
-
-    if distributed:  # Use FSDP model for distributed training
-        model = utils.wrap_fsdp(
-            model=model,
-            device=device,
-            dtype=dtype,
-            strategy="FULL_SHARD",
-            auto_wrap_policy={modules.TransformerDecoderLayer},
-            cpu_offload=params.cpu_offload,
-        )
-    if params.enable_activation_checkpointing:
-        utils.set_activation_checkpointing(
-            model, auto_wrap_policy={modules.TransformerDecoderLayer}
-        )
-
-    # ---- Setup optimization functions ---- #
-    opt = optim.get_optimizer(params.optimizer, model, params.lr)
-    # Load model and possibly optimizer states
-    if params.resume_from_checkpoint:
-        ckpt_dict = load_checkpoint(params.model_checkpoint, model, opt)
-        model.load_state_dict(ckpt_dict["model"])
-        # Note: optimizer entry in dictionary is pre-transformed if using FSDP
-        opt.load_state_dict(ckpt_dict["optimizer"])
-        if rank == 0:
-            logger.info(
-                msg=f"Loaded checkpoint from previous finetune from {params.model_checkpoint}"
-            )
-    else:
-        ckpt_dict = load_checkpoint(params.model_checkpoint, model)
-        model.load_state_dict(ckpt_dict["model"])
-        if rank == 0:
-            logger.info(msg=f"Loaded pretrained model from {params.model_checkpoint}")
-
-    # TODO add lr schedule option
-    loss_fn = losses.get_loss(params.loss)
-
-    autocast = utils.get_autocast(dtype, device)
-    if dtype == torch.float16:
-        grad_scaler = utils.get_gradient_scaler(distributed)
-    else:
-        grad_scaler = GradScaler(enabled=False)
-
-    # ---- Load dataset, set up sampler, and dataloader ---- #
-    ds = datasets.get_dataset(
-        params.dataset,
-        split="train",
-        tokenizer=tokenizer,
-        train_on_input=params.train_on_input,
-    )
-    sampler = DistributedSampler(
-        ds,
-        num_replicas=world_size,
-        rank=rank,
-        shuffle=params.shuffle,
-        seed=0,
-    )
-    dataloader = DataLoader(
-        dataset=ds,
-        batch_size=params.batch_size,
-        sampler=sampler,
-        collate_fn=partial(
-            utils.padded_collate,
-            padding_idx=tokenizer.pad_id,
-            ignore_idx=loss_fn.ignore_index,  # TODO support loss without ignore_index
-        ),
-    )
-    logger.info(msg=f"Loaded dataset {params.dataset}")
-
-    # ---- Train loop ---- #
-    for epoch in range(params.epochs):
-        sampler.set_epoch(epoch)  # distributed sampler requires set_epoch
-        for idx, batch in enumerate(pbar := tqdm(dataloader, disable=not (rank == 0))):
-            if (
-                params.max_steps_per_epoch is not None
-                and idx == params.max_steps_per_epoch
-            ):
-                break
-            opt.zero_grad()
-
-            input_ids, labels = batch
-            input_ids = input_ids.to(device)
-            labels = labels.to(device)
-
-            with autocast:
-                logits = model(input_ids)
-                # Shift so that tokens < n predict n
-                logits = logits[..., :-1, :].contiguous()
-                labels = labels[..., 1:].contiguous()
-                logits = logits.transpose(1, 2)
-                # Compute loss
-                loss = loss_fn(logits, labels)
-
-            pbar.set_description(f"{epoch+1}|{idx+1}|Loss: {loss.item()}")
-
-            # Log metrics at each step
-            # If no metric logger is specified, this is a no-op
-            if rank == 0:
-                metric_logger.log_dict(
-                    {
-                        "loss": loss.item(),
-                        "lr": opt.param_groups[0]["lr"],
-                        "gpu_resources": torch.cuda.memory_allocated(),
-                    },
-                    step=epoch * len(dataloader)
-                    + idx,  # Each step is unique, not limited to each epoch
-                )
-
-            grad_scaler.scale(loss).backward()
-            grad_scaler.step(opt)
-            grad_scaler.update()
-
-            # --- TODO TEMPORARY EVAL Code ---- #
-            if params.run_generation and idx % params.run_generation == 0:
-                # Log a sample generation for the instruction.
-                # Just using a hardcoded prompt for now
-                prompt = (
-                    "Below is an instruction that describes a task, paired with an input that provides further context. "
-                    "Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a classification task "
-                    "by clustering the given list of items.\n\n### Input:\nApples, oranges, bananas, strawberries, pineapples\n\n"
-                    "### Response:"
-                )
-                generation_str, decoded_tokens = generate_from_prompt(
-                    prompt=prompt, tokenizer=tokenizer, decoder=model
-                )
-                if rank == 0:
-                    logger.info(f"Generation tokens: {decoded_tokens}")
-                    logger.info(f"Generation: {generation_str}")
-            # --- TODO TEMPORARY EVAL Code Ends ---- #
-
-        # ---- Save checkpoint at end of each epoch (to be changed later) ---- #
-        os.makedirs(params.output_dir, exist_ok=True)
-        output_loc = f"{params.output_dir}/model_{epoch}.ckpt"
-        ckpt_dict = {
-            "model": model,
-            "optimizer": opt,
-        }
-        if epoch == params.epochs - 1:
-            # Don't save optimizer state when producing final checkpoint to reduce checkpoint file size.
-            ckpt_dict.pop("optimizer")
-        if rank == 0:
-            logger.info(msg=f"Saving model checkpoint to {output_loc}")
-        save_checkpoint(ckpt_dict, output_loc)
-        if rank == 0:
-            logger.info(
-                msg=f"Model checkpoint of size {os.path.getsize(output_loc) >> 20} MB saved to {output_loc}"
-            )
-
-    metric_logger.close()
-
-
-if __name__ == "__main__":
-    parser = utils.TuneArgumentParser(
-        description=FullFinetuneParams.__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    # Get user-specified args from config and CLI and create params for recipe
-    args, _ = parser.parse_known_args()
-    args = vars(args)
-    params = FullFinetuneParams(**args)
-
-    logger = utils.get_logger("DEBUG")
-    logger.info(msg=f"Running finetune_llm.py with parameters {params}")
-
-    recipe(params)
diff --git a/recipes/tests/test_finetune_llm.py b/recipes/tests/test_full_finetune.py
similarity index 68%
rename from recipes/tests/test_finetune_llm.py
rename to recipes/tests/test_full_finetune.py
index 2b6dde071b..5b4c2848b2 100644
--- a/recipes/tests/test_finetune_llm.py
+++ b/recipes/tests/test_full_finetune.py
@@ -11,7 +11,6 @@
 
 import pytest
 
-import recipes.finetune_llm as finetune_llm
 from recipes.full_finetune import FullFinetuneRecipe
 from recipes.params import FullFinetuneParams
 
@@ -40,124 +39,6 @@ def small_test_ckpt(max_batch_size: Optional[int] = None) -> TransformerDecoder:
 logger = logging.getLogger(__name__)
 
 
-class TestFinetuneLLMRecipe:
-    def _fetch_loss_values(self, output) -> Dict[str, float]:
-        lines = output.splitlines()
-        loss_values = {}
-        for line in lines:
-            if "Loss:" in line:
-                splits = line.split("Loss:")
-                loss_value = float(splits[1].split(":")[0])
-                loss_values[splits[0]] = loss_value
-        return loss_values
-
-    def _fetch_expected_loss_values(self, ckpt) -> Dict[str, float]:
-        small_test_ckpt_loss_values = {
-            "1|1|": 10.5074,
-            "1|2|": 10.5563,
-            "2|1|": 10.5152,
-            "2|2|": 10.4851,
-        }
-        llama2_7b_ckpt_loss_values = {
-            "1|1|": 1.1333,
-            "1|2|": 1.1199,
-            "2|1|": 1.2614,
-            "2|2|": 0.9486,
-        }
-        if ckpt == "small_test_ckpt":
-            return small_test_ckpt_loss_values
-        if ckpt == "llama2_7b":
-            return llama2_7b_ckpt_loss_values
-        raise ValueError(f"Unknown ckpt {ckpt}")
-
-    def _fetch_ckpt_model_path(self, ckpt) -> str:
-        if ckpt == "small_test_ckpt":
-            return "/tmp/test-artifacts/small-ckpt-01242024"
-        if ckpt == "llama2_7b":
-            return "/tmp/test-artifacts/llama2-7b-01242024"
-        raise ValueError(f"Unknown ckpt {ckpt}")
-
-    def test_finetune_llm_loss(self, capsys, pytestconfig):
-        large_scale = pytestconfig.getoption("--large-scale")
-        ckpt = "llama2_7b" if large_scale else "small_test_ckpt"
-        expected_loss_values = self._fetch_expected_loss_values(ckpt)
-
-        kwargs_values = {
-            "dataset": "alpaca",
-            "train_on_input": False,
-            "seed": 9,
-            "shuffle": True,
-            "model": ckpt,
-            "model_checkpoint": self._fetch_ckpt_model_path(ckpt),
-            "tokenizer": "llama2_tokenizer",
-            "tokenizer_checkpoint": "/tmp/test-artifacts/tokenizer.model",
-            "batch_size": 8,
-            "lr": 2e-5,
-            "epochs": 2,
-            "max_steps_per_epoch": 2,
-            "optimizer": "AdamW",
-            "loss": "CrossEntropyLoss",
-            "output_dir": "/tmp",
-            "device": "cpu",
-            "dtype": "fp32",
-            "enable_activation_checkpointing": False,
-            "enable_fsdp": False,
-            "run_generation": None,
-            "metric_logger_type": "disk",
-            "project": None,
-            "resume_from_checkpoint": False,
-            "cpu_offload": False,
-        }
-
-        finetune_llm.recipe(FullFinetuneParams(**kwargs_values))
-        loss_values = self._fetch_loss_values(capsys.readouterr().err)
-        logger.info("Expected loss values : ", expected_loss_values)
-        logger.info("Loss values from Finetune : ", loss_values)
-        assert len(loss_values) == len(expected_loss_values)
-        for key, value in loss_values.items():
-            assert key in expected_loss_values
-            expected_loss_value = expected_loss_values[key]
-            assert value == pytest.approx(expected_loss_value, abs=0.001)
-
-    def test_finetune_errors(self, capsys, pytestconfig):
-        large_scale = pytestconfig.getoption("--large-scale")
-        ckpt = "llama2_7b" if large_scale else "small_test_ckpt"
-        expected_loss_values = self._fetch_expected_loss_values(ckpt)
-
-        kwargs_values = {
-            "dataset": "alpaca",
-            "train_on_input": False,
-            "seed": 9,
-            "shuffle": True,
-            "model": ckpt,
-            "model_checkpoint": self._fetch_ckpt_model_path(ckpt),
-            "tokenizer": "llama2_tokenizer",
-            "tokenizer_checkpoint": "/tmp/test-artifacts/tokenizer.model",
-            "batch_size": 8,
-            "lr": 2e-5,
-            "epochs": 2,
-            "max_steps_per_epoch": 2,
-            "optimizer": "AdamW",
-            "loss": "CrossEntropyLoss",
-            "output_dir": "/tmp",
-            "device": "cpu",
-            "dtype": "fp32",
-            "enable_activation_checkpointing": False,
-            "enable_fsdp": False,
-            "run_generation": None,
-            "metric_logger_type": "disk",
-            "project": None,
-            "resume_from_checkpoint": False,
-            "cpu_offload": True,
-        }
-
-        with pytest.raises(
-            ValueError,
-            match="Cannot offload model to CPU if device is not cuda or <= 1 GPUs.",
-        ):
-            finetune_llm.recipe(FullFinetuneParams(**kwargs_values))
-
-
 class TestFullFinetuneRecipe:
     def _fetch_loss_values(self, output) -> Dict[str, float]:
         lines = output.splitlines()
diff --git a/tests/scripts/test_tune.py b/tests/scripts/test_tune.py
index 1bdaaf6e01..3ce8586a36 100644
--- a/tests/scripts/test_tune.py
+++ b/tests/scripts/test_tune.py
@@ -32,7 +32,7 @@ def test_recipe_list(self, capsys):
 
     def test_recipe_cp(self, tmp_path, capsys):
         # Valid recipe
-        recipe = "finetune_llm"
+        recipe = "full_finetune"
         path = tmp_path / "dummy.py"
         testargs = f"tune recipe cp {recipe} {path}".split()
         with patch.object(sys, "argv", testargs):
@@ -69,7 +69,7 @@ def test_recipe_paths(self):
             assert os.path.exists(recipe_path), f"{recipe_path} must exist"
 
     def test_config_list(self, capsys):
-        recipe = "finetune_llm"
+        recipe = "full_finetune"
         testargs = f"tune config list --recipe {recipe}".split()
         with patch.object(sys, "argv", testargs):
             runpy.run_path(TUNE_PATH, run_name="__main__")
@@ -82,7 +82,7 @@ def test_config_list(self, capsys):
 
     def test_config_cp(self, tmp_path, capsys):
         # Valid recipe
-        config = "alpaca_llama2_finetune"
+        config = "alpaca_llama2_full_finetune"
         path = tmp_path / "dummy.yaml"
         testargs = f"tune config cp {config} {path}".split()
         with patch.object(sys, "argv", testargs):
@@ -123,10 +123,10 @@ def test_config_paths(self):
                 assert os.path.exists(config_path), f"{config_path} must exist"
 
     def test_run(self, capsys):
-        recipe = "finetune_llm"
+        recipe = "full_finetune"
         # Make sure we're not running on GPU which can lead to issues on GH CI
         testargs = f"\
-            tune {recipe} --config alpaca_llama2_finetune --override tokenizer=fake \
+            tune {recipe} --config alpaca_llama2_full_finetune --override tokenizer=fake \
             device=cpu enable_fsdp=False enable_activation_checkpointing=False \
         ".split()
         with patch.object(sys, "argv", testargs):