diff --git a/docs/source/examples/mlflow.rst b/docs/source/examples/mlflow.rst
index 89625689479..d0901c2f74b 100644
--- a/docs/source/examples/mlflow.rst
+++ b/docs/source/examples/mlflow.rst
@@ -4,7 +4,7 @@ MLflow
 
 `MLflow <https://mlflow.org/>`_ is an open-source platform, purpose-built to assist machine learning practitioners and teams in handling the complexities of the machine learning process. MLflow focuses on the full lifecycle for machine learning projects, ensuring that each phase is manageable, traceable, and reproducible.
 
-This document explains how to serve and deploy an MLflow model with BentoML. You can find all the source code in `examples/mlflow <https://github.com/bentoml/BentoML/tree/main/examples/mlflow>`_.
+This document explains how to serve and deploy an MLflow model with BentoML. You can find all the source code `here <https://github.com/bentoml/BentoMLflow>`_.
 
 Prerequisites
 -------------
diff --git a/docs/source/examples/overview.rst b/docs/source/examples/overview.rst
index c51101f92a0..71f360f757d 100644
--- a/docs/source/examples/overview.rst
+++ b/docs/source/examples/overview.rst
@@ -92,9 +92,8 @@ Custom models
 
 Serve custom models with BentoML:
 
-- `MLflow <https://github.com/bentoml/BentoML/tree/main/examples/mlflow>`_
-- `XGBoost <https://github.com/bentoml/BentoML/tree/main/examples/xgboost>`_
-- `scikit-learn <https://github.com/bentoml/BentoML/tree/main/examples/sk-learn>`_
+- `MLflow <https://github.com/bentoml/BentoMLflow>`_
+- `XGBoost <https://github.com/bentoml/BentoXGBoost>`_
 
 Others
 ------
diff --git a/docs/source/examples/xgboost.rst b/docs/source/examples/xgboost.rst
index 9b0ce137fa3..01ee1e78e58 100644
--- a/docs/source/examples/xgboost.rst
+++ b/docs/source/examples/xgboost.rst
@@ -4,7 +4,7 @@ XGBoost
 
 `XGBoost <https://xgboost.readthedocs.io/en/stable/>`_ is an optimized distributed gradient boosting library designed to be highly efficient, flexible and portable. It implements machine learning algorithms under the Gradient Boosting framework.
 
-This document explains how to serve and deploy an XGBoost model for predicting breast cancer with BentoML. You can find all the source code in `examples/xgboost <https://github.com/bentoml/BentoML/tree/main/examples/xgboost>`_.
+This document explains how to serve and deploy an XGBoost model for predicting breast cancer with BentoML. You can find all the source code `here <https://github.com/bentoml/BentoXGBoost>`_.
 
 Prerequisites
 -------------
diff --git a/examples/.gitignore b/examples/.gitignore
deleted file mode 100644
index 1269488f7fb..00000000000
--- a/examples/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-data
diff --git a/examples/gradio/README.md b/examples/gradio/README.md
deleted file mode 100644
index 91e016bd830..00000000000
--- a/examples/gradio/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Quickstart
-
-This quickstart demonstrates how to add Gradio web UI to a BentoML service.
-
-## Prerequisites
-
-Python 3.9+ and `pip` installed. See the [Python downloads page](https://www.python.org/downloads/) to learn more.
-
-## Get started
-
-Perform the following steps to run this project and deploy it to BentoCloud.
-
-1. Install the required dependencies:
-
-   ```bash
-   pip install -r requirements.txt
-   ```
-
-2. Serve your model as an HTTP server. This starts a local server at [http://localhost:3000](http://localhost:3000/), making your model accessible as a web service.
-
-   ```bash
-   bentoml serve .
-   ```
-
-3. Visit http://localhost:3000/ui for gradio UI. BentoML APIs can be found at http://localhost:3000
diff --git a/examples/gradio/bentofile.yaml b/examples/gradio/bentofile.yaml
deleted file mode 100644
index 10c4a722daa..00000000000
--- a/examples/gradio/bentofile.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-service: "service:Summarization"
-labels:
-  project: quickstart
-  stage: dev
-include:
-  - "service.py"
-python:
-  packages:
-  - torch
-  - transformers
-  - gradio
-  - pydantic>=2.0
-  - fastapi
-  lock_packages: false
-docker:
-  python_version: "3.10"
diff --git a/examples/gradio/requirements.txt b/examples/gradio/requirements.txt
deleted file mode 100644
index 48ab8a59809..00000000000
--- a/examples/gradio/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-bentoml
-torch
-transformers
-gradio
-fastapi
diff --git a/examples/gradio/service.py b/examples/gradio/service.py
deleted file mode 100644
index 8411c8d11d1..00000000000
--- a/examples/gradio/service.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import annotations  # I001
-
-import bentoml
-
-with bentoml.importing():
-    import gradio as gr
-    import torch
-    from transformers import pipeline
-
-EXAMPLE_INPUT = "Breaking News: In an astonishing turn of events, the small \
-town of Willow Creek has been taken by storm as local resident Jerry Thompson's cat, \
-Whiskers, performed what witnesses are calling a 'miraculous and gravity-defying leap.' \
-Eyewitnesses report that Whiskers, an otherwise unremarkable tabby cat, jumped \
-a record-breaking 20 feet into the air to catch a fly. The event, which took \
-place in Thompson's backyard, is now being investigated by scientists for potential \
-breaches in the laws of physics. Local authorities are considering a town festival \
-to celebrate what is being hailed as 'The Leap of the Century."
-
-
-def summarize_text(text: str) -> str:
-    svc_instance = bentoml.get_current_service()
-    return svc_instance.summarize([text])[0]
-
-
-io = gr.Interface(
-    fn=summarize_text,
-    inputs=[gr.Textbox(lines=5, label="Enter Text", value=EXAMPLE_INPUT)],
-    outputs=[gr.Textbox(label="Summary Text")],
-    title="Summarization",
-    description="Enter text to get summarized text.",
-)
-
-
-@bentoml.service(resources={"cpu": "4"})
-@bentoml.gradio.mount_gradio_app(io, path="/ui")
-class Summarization:
-    def __init__(self) -> None:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.pipeline = pipeline("summarization", device=device)
-
-    @bentoml.api(batchable=True)
-    def summarize(self, texts: list[str]) -> list[str]:
-        results = self.pipeline(texts)
-        return [item["summary_text"] for item in results]
diff --git a/examples/mlflow/bentofile.yaml b/examples/mlflow/bentofile.yaml
deleted file mode 100644
index 04bb300bb54..00000000000
--- a/examples/mlflow/bentofile.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-service: "service:IrisClassifier"
-labels:
-  owner: bentoml-team
-  stage: demo
-include:
-  - "*.py"
-python:
-  requirements_txt: './requirements.txt'
diff --git a/examples/mlflow/requirements.txt b/examples/mlflow/requirements.txt
deleted file mode 100644
index 2693586003c..00000000000
--- a/examples/mlflow/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-scikit-learn
-mlflow
-bentoml
diff --git a/examples/mlflow/save_model.py b/examples/mlflow/save_model.py
deleted file mode 100644
index 33b7116a9d7..00000000000
--- a/examples/mlflow/save_model.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from pathlib import Path
-
-import mlflow.sklearn
-from sklearn.datasets import load_iris
-from sklearn.neighbors import KNeighborsClassifier
-
-import bentoml
-
-iris = load_iris()
-X_train = iris.data[:, :4]
-Y_train = iris.target
-
-model_uri = Path("models", "IrisClf")
-model = KNeighborsClassifier()
-model.fit(X_train, Y_train)
-mlflow.sklearn.save_model(model, model_uri.resolve())
-# model_uri can be any URI that refers to an MLflow model
-# Use local path for demostration
-bentoml.mlflow.import_model("iris", model_uri)
diff --git a/examples/mlflow/service.py b/examples/mlflow/service.py
deleted file mode 100644
index 15917e5c8c6..00000000000
--- a/examples/mlflow/service.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import numpy as np
-
-import bentoml
-
-
-@bentoml.service(
-    resources={"cpu": "2"},
-    traffic={"timeout": 10},
-)
-class IrisClassifier:
-    bento_model = bentoml.models.get("iris:latest")
-
-    def __init__(self):
-        self.model = bentoml.mlflow.load_model(self.bento_model)
-
-    @bentoml.api
-    def predict(self, input_data: np.ndarray) -> np.ndarray:
-        rv = self.model.predict(input_data)
-        return np.asarray(rv)
diff --git a/examples/mlflow/test.py b/examples/mlflow/test.py
deleted file mode 100644
index 27d2d89ddbc..00000000000
--- a/examples/mlflow/test.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import numpy as np
-
-import bentoml
-
-# Load the model by specifying the model tag
-iris_model = bentoml.mlflow.load_model("iris:74px7hboeo25fjjt")
-
-input_data = np.array([[5.9, 3, 5.1, 1.8]])
-res = iris_model.predict(input_data)
-print(res)
diff --git a/examples/sk-learn/.bentoignore b/examples/sk-learn/.bentoignore
deleted file mode 100644
index f4e455d1cb8..00000000000
--- a/examples/sk-learn/.bentoignore
+++ /dev/null
@@ -1,4 +0,0 @@
-__pycache__/
-*.py[cod]
-*$py.class
-.ipynb_checkpoints
diff --git a/examples/sk-learn/benchmark/README.md b/examples/sk-learn/benchmark/README.md
deleted file mode 100644
index 4ca3ed0bd66..00000000000
--- a/examples/sk-learn/benchmark/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-Run the iris_classifier service in production mode:
-
-| Protocol | Command                                     |
-| -------- | ------------------------------------------- |
-| HTTP     | `bentoml serve-http iris_classifier:latest` |
-| gRPC     | `bentoml serve-grpc iris_classifier:latest` |
-
-Start locust testing client:
-
-```bash
-locust --class-picker -H http://localhost:3000
-```
diff --git a/examples/sk-learn/benchmark/locustfile.py b/examples/sk-learn/benchmark/locustfile.py
deleted file mode 100644
index 52a223196e1..00000000000
--- a/examples/sk-learn/benchmark/locustfile.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import time
-
-import grpc
-import numpy as np
-from locust import HttpUser
-from locust import User
-from locust import between
-from locust import task
-from sklearn import datasets
-
-from bentoml.grpc.v1 import service_pb2 as pb
-from bentoml.grpc.v1 import service_pb2_grpc as services
-
-test_data = datasets.load_iris().data
-num_of_rows = test_data.shape[0]
-max_batch_size = 10
-
-
-class IrisHttpUser(HttpUser):
-    """
-    Usage:
-        Run the iris_classifier service in production mode:
-
-            bentoml serve-http iris_classifier:latest
-
-        Start locust load testing client with:
-
-            locust --class-picker -H http://localhost:3000
-
-        Open browser at http://0.0.0.0:8089, adjust desired number of users and spawn
-        rate for the load test from the Web UI and start swarming.
-    """
-
-    @task
-    def classify(self):
-        start = np.random.choice(num_of_rows - max_batch_size)
-        end = start + np.random.choice(max_batch_size) + 1
-
-        input_data = test_data[start:end]
-        self.client.post("/classify", json=input_data.tolist())
-
-    wait_time = between(0.01, 2)
-
-
-class GrpcUser(User):
-    abstract = True
-
-    stub_class = None
-
-    def __init__(self, environment):
-        super().__init__(environment)
-        self.environment = environment
-
-    def on_start(self):
-        self.channel = grpc.insecure_channel(self.host)
-        self.stub = services.BentoServiceStub(self.channel)
-
-
-class IrisGrpcUser(GrpcUser):
-    """
-    Implementation is inspired by https://docs.locust.io/en/stable/testing-other-systems.html
-
-    Usage:
-        Run the iris_classifier service in production mode:
-
-            bentoml serve-grpc iris_classifier:latest
-
-        Start locust load testing client with:
-
-            locust --class-picker -H localhost:3000
-
-        Open browser at http://0.0.0.0:8089, adjust desired number of users and spawn
-        rate for the load test from the Web UI and start swarming.
-    """
-
-    @task
-    def classify(self):
-        start = np.random.choice(num_of_rows - max_batch_size)
-        end = start + np.random.choice(max_batch_size) + 1
-        input_data = test_data[start:end]
-        request_meta = {
-            "request_type": "grpc",
-            "name": "classify",
-            "start_time": time.time(),
-            "response_length": 0,
-            "exception": None,
-            "context": None,
-            "response": None,
-        }
-        start_perf_counter = time.perf_counter()
-        try:
-            request_meta["response"] = self.stub.Call(
-                request=pb.Request(
-                    api_name=request_meta["name"],
-                    ndarray=pb.NDArray(
-                        dtype=pb.NDArray.DTYPE_FLOAT,
-                        # shape=(1, 4),
-                        shape=(len(input_data), 4),
-                        # float_values=[5.9, 3, 5.1, 1.8],
-                        float_values=input_data.flatten(),
-                    ),
-                )
-            )
-        except grpc.RpcError as e:
-            request_meta["exception"] = e
-        request_meta["response_time"] = (
-            time.perf_counter() - start_perf_counter
-        ) * 1000
-        self.environment.events.request.fire(**request_meta)
-
-    wait_time = between(0.01, 2)
diff --git a/examples/sk-learn/bentofile.yaml b/examples/sk-learn/bentofile.yaml
deleted file mode 100644
index 9785152583b..00000000000
--- a/examples/sk-learn/bentofile.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-service: "service.py:IrisClassifier"
-labels:
-  owner: bentoml-team
-  project: gallery
-include:
-  - "*.py"
-python:
-  packages:
-    - scikit-learn
-    - pandas
-envs:
-- name: HF_TOKEN
-  value: xxx
diff --git a/examples/sk-learn/client.py b/examples/sk-learn/client.py
deleted file mode 100644
index f8665313356..00000000000
--- a/examples/sk-learn/client.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import numpy as np
-
-import bentoml
-
-input_series = [
-    [5.1, 3.5, 1.4, 0.2],
-    [6.2, 2.9, 4.3, 1.3],
-    [5.9, 3.0, 5.1, 1.8],
-    [4.6, 3.1, 1.5, 0.2],
-    [6.7, 3.1, 4.4, 1.4],
-    [5.5, 2.6, 4.4, 1.2],
-    [7.7, 3.0, 6.1, 2.3],
-    [4.9, 3.0, 1.4, 0.2],
-]
-
-with bentoml.SyncHTTPClient("http://localhost:3000") as client:
-    pred: np.ndarray = client.classify(input_series)
-    print(pred)
diff --git a/examples/sk-learn/prepare_model.py b/examples/sk-learn/prepare_model.py
deleted file mode 100644
index 5b84bd89c90..00000000000
--- a/examples/sk-learn/prepare_model.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import joblib
-from sklearn import datasets
-from sklearn import svm
-
-import bentoml
-
-if __name__ == "__main__":
-    # Load training data
-    iris = datasets.load_iris()
-    X, y = iris.data, iris.target  # type: ignore
-
-    # Model Training
-    clf = svm.SVC()
-    clf.fit(X, y)
-
-    # Save model to BentoML local model store
-    with bentoml.models.create("iris_sklearn") as bento_model:
-        joblib.dump(clf, bento_model.path_of("model.pkl"))
-    print(f"Model saved: {bento_model}")
diff --git a/examples/sk-learn/service.py b/examples/sk-learn/service.py
deleted file mode 100644
index e79da1a3bd6..00000000000
--- a/examples/sk-learn/service.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-from pydantic import Field
-from typing_extensions import Annotated
-
-import bentoml
-from bentoml.validators import Shape
-
-
-@bentoml.service(
-    resources={
-        "cpu": "1",
-        "memory": "2Gi",
-    },
-)
-class IrisClassifier:
-    """
-    A simple Iris classification service using a sklearn model
-    """
-
-    # Load in the class scope to declare the model as a dependency of the service
-    iris_model = bentoml.models.get("iris_sklearn:latest")
-
-    def __init__(self):
-        """
-        Initialize the service by loading the model from the model store
-        """
-        import joblib
-
-        self.model = joblib.load(self.iris_model.path_of("model.pkl"))
-
-    @bentoml.api
-    def classify(
-        self,
-        input_series: Annotated[np.ndarray, Shape((-1, 4))] = Field(
-            default=[[5.2, 2.3, 5.0, 0.7]]
-        ),
-    ) -> np.ndarray:
-        """
-        Define API with preprocessing and model inference logic
-        """
-        return self.model.predict(input_series)
diff --git a/examples/xgboost/bentofile.yaml b/examples/xgboost/bentofile.yaml
deleted file mode 100644
index b8d71e6075b..00000000000
--- a/examples/xgboost/bentofile.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-service: "service:CancerClassifier"
-labels:
-  owner: bentoml-team
-  stage: demo
-include:
-  - "*.py"
-python:
-  requirements_txt: './requirements.txt'
diff --git a/examples/xgboost/requirements.txt b/examples/xgboost/requirements.txt
deleted file mode 100644
index 5178c15bc80..00000000000
--- a/examples/xgboost/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-scikit-learn
-xgboost
-bentoml
diff --git a/examples/xgboost/save_model.py b/examples/xgboost/save_model.py
deleted file mode 100644
index e6a1cf16e92..00000000000
--- a/examples/xgboost/save_model.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import typing as t
-
-import xgboost as xgb
-from sklearn.datasets import load_breast_cancer
-from sklearn.utils import Bunch
-
-import bentoml
-
-if t.TYPE_CHECKING:
-    from sklearn.utils import Bunch
-
-    from bentoml._internal import external_typing as ext
-
-# Load the data
-cancer: Bunch = t.cast("Bunch", load_breast_cancer())
-cancer_data = t.cast("ext.NpNDArray", cancer.data)
-cancer_target = t.cast("ext.NpNDArray", cancer.target)
-dt = xgb.DMatrix(cancer_data, label=cancer_target)
-
-# Specify model parameters
-param = {"max_depth": 3, "eta": 0.3, "objective": "multi:softprob", "num_class": 2}
-
-# Train the model
-model = xgb.train(param, dt)
-
-# Specify the model name and the model to be saved
-bentoml.xgboost.save_model("cancer", model)
diff --git a/examples/xgboost/service.py b/examples/xgboost/service.py
deleted file mode 100644
index 344ab0e70a7..00000000000
--- a/examples/xgboost/service.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import os
-
-import numpy as np
-import xgboost as xgb
-
-import bentoml
-
-
-@bentoml.service(
-    resources={"cpu": "2"},
-    traffic={"timeout": 10},
-)
-class CancerClassifier:
-    # Retrieve the latest version of the model from the BentoML model store
-    bento_model = bentoml.models.get("cancer:latest")
-
-    def __init__(self):
-        self.model = bentoml.xgboost.load_model(self.bento_model)
-
-        # Check resource availability
-        if os.getenv("CUDA_VISIBLE_DEVICES") not in (None, "", "-1"):
-            self.model.set_param({"predictor": "gpu_predictor", "gpu_id": 0})  # type: ignore (incomplete XGBoost types)
-        else:
-            nthreads = os.getenv("OMP_NUM_THREADS")
-            if nthreads:
-                nthreads = max(int(nthreads), 1)
-            else:
-                nthreads = 1
-            self.model.set_param({"predictor": "cpu_predictor", "nthread": nthreads})
-
-    @bentoml.api
-    def predict(self, data: np.ndarray) -> np.ndarray:
-        return self.model.predict(xgb.DMatrix(data))
diff --git a/examples/xgboost/test.py b/examples/xgboost/test.py
deleted file mode 100644
index a905ad9b23f..00000000000
--- a/examples/xgboost/test.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import xgboost as xgb
-
-import bentoml
-
-# Load the model by setting the model tag
-booster = bentoml.xgboost.load_model("cancer:latest")
-
-# Predict using a sample
-res = booster.predict(
-    xgb.DMatrix(
-        [
-            [
-                1.308e01,
-                1.571e01,
-                8.563e01,
-                5.200e02,
-                1.075e-01,
-                1.270e-01,
-                4.568e-02,
-                3.110e-02,
-                1.967e-01,
-                6.811e-02,
-                1.852e-01,
-                7.477e-01,
-                1.383e00,
-                1.467e01,
-                4.097e-03,
-                1.898e-02,
-                1.698e-02,
-                6.490e-03,
-                1.678e-02,
-                2.425e-03,
-                1.450e01,
-                2.049e01,
-                9.609e01,
-                6.305e02,
-                1.312e-01,
-                2.776e-01,
-                1.890e-01,
-                7.283e-02,
-                3.184e-01,
-                8.183e-02,
-            ]
-        ]
-    )
-)
-
-print(res)
-# Expected output: [[0.02664177 0.9733583 ]]
diff --git a/tests/e2e/bento_new_sdk/conftest.py b/tests/e2e/bento_new_sdk/conftest.py
index aee2511300f..9992a2ec3df 100644
--- a/tests/e2e/bento_new_sdk/conftest.py
+++ b/tests/e2e/bento_new_sdk/conftest.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-EXAMPLE_DIR = Path(__file__).parent.parent.parent.parent / "examples"
+EXAMPLE_DIR = Path(__file__).parent.parent / "fixtures"
 E2E_EXAMPLES = ["quickstart"]
 
 
diff --git a/examples/quickstart/README.md b/tests/e2e/fixtures/quickstart/README.md
similarity index 100%
rename from examples/quickstart/README.md
rename to tests/e2e/fixtures/quickstart/README.md
diff --git a/examples/quickstart/bentofile.yaml b/tests/e2e/fixtures/quickstart/bentofile.yaml
similarity index 100%
rename from examples/quickstart/bentofile.yaml
rename to tests/e2e/fixtures/quickstart/bentofile.yaml
diff --git a/examples/quickstart/requirements.txt b/tests/e2e/fixtures/quickstart/requirements.txt
similarity index 100%
rename from examples/quickstart/requirements.txt
rename to tests/e2e/fixtures/quickstart/requirements.txt
diff --git a/examples/quickstart/service.py b/tests/e2e/fixtures/quickstart/service.py
similarity index 100%
rename from examples/quickstart/service.py
rename to tests/e2e/fixtures/quickstart/service.py