From 9b1c77f949a03e6c6e979415e1ad3b708662770b Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 23 Sep 2024 15:13:34 +0200
Subject: [PATCH 1/5] chore: add use_gpu for cifar finetuning

---
 .../cifar_brevitas_finetuning/CifarInFhe.ipynb   |  8 +++++---
 .../CifarInFheWithSmallerAccumulators.ipynb      |  6 +++++-
 .../CifarQuantizationAwareTraining.ipynb         |  8 +++++---
 .../FromImageNetToCifar.ipynb                    |  4 +++-
 .../PerrorImpactOnFMNIST.ipynb                   | 10 ++++++++--
 .../cifar_brevitas_finetuning/cifar_utils.py     |  6 +++---
 use_case_examples/resnet/README.md               | 16 ++++++++--------
 7 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb
index 7e5e82e2b..da3aa471a 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb
@@ -32,6 +32,7 @@
     "    plot_dataset,\n",
     "    torch_inference,\n",
     ")\n",
+    "from concrete.compiler import check_gpu_available\n",
     "from models import QuantVGG11\n",
     "from torchvision import datasets\n",
     "\n",
@@ -62,7 +63,8 @@
     "bit = 5\n",
     "seed = 42\n",
     "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "use_gpu_if_available = False\n",
+    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
     "\n",
     "print(f\"Device Type: {device}\")"
    ]
@@ -206,7 +208,7 @@
     "\n",
     "data_calibration, _ = next(iter(train_loader_c10))\n",
     "\n",
-    "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration)\n",
+    "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration, device=device)\n",
     "\n",
     "print(\n",
     "    f\"Maximum bit-width in the circuit: {qmodel_c10.fhe_circuit.graph.maximum_integer_bit_width()}\"\n",
@@ -394,7 +396,7 @@
     "\n",
     "data_calibration, _ = next(iter(train_loader_c100))\n",
     "\n",
-    "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration)\n",
+    "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration, device=device)\n",
     "\n",
     "print(\n",
     "    f\"Maximum bit-width in the circuit: {qmodel_c100.fhe_circuit.graph.maximum_integer_bit_width()}\"\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb
index b348b2bb0..b76edd99e 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb
@@ -33,6 +33,8 @@
     "import matplotlib.pyplot as plt\n",
     "import torch\n",
     "from cifar_utils import fhe_simulation_inference, get_dataloader, torch_inference\n",
+    "from concrete.compiler import check_gpu_available\n",
+    "from concrete.fhe.compilation import Configuration\n",
     "from models import QuantVGG11\n",
     "from torch.utils.data.dataloader import DataLoader\n",
     "from torchvision import datasets\n",
@@ -59,7 +61,8 @@
     "seed = 42\n",
     "rounding_thresholds_bits = [8, 7, 6, 5, 3]\n",
     "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
+    "use_gpu_if_available = False\n",
+    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\""
    ]
   },
   {
@@ -91,6 +94,7 @@
     "            model.to(\"cpu\"),\n",
     "            torch_inputset=X_train,\n",
     "            rounding_threshold_bits=max_bitwidth,\n",
+    "            configuration=Configuration(use_gpu=(device == \"cuda\")),\n",
     "        )\n",
     "\n",
     "        acc_fhe_s = fhe_simulation_inference(qmodel, test_loader, True)\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
index e29ed2c2b..6e834d91f 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
@@ -40,6 +40,7 @@
     "    torch_inference,\n",
     "    train,\n",
     ")\n",
+    "from concrete.compiler import check_gpu_available\n",
     "\n",
     "# As we follow the same methodology for quantization aware training for CIFAR-10 and CIFAR-100.\n",
     "# Let's import some generic functions.\n",
@@ -88,7 +89,8 @@
    "source": [
     "bit = 5\n",
     "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "use_gpu_if_available = False\n",
+    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
     "\n",
     "print(f\"Device Type: {device}\")"
    ]
@@ -280,7 +282,7 @@
     "\n",
     "data_calibration, _ = next(iter(train_loader_c100))\n",
     "\n",
-    "qmodel = fhe_compatibility(quant_vgg, data_calibration)\n",
+    "qmodel = fhe_compatibility(quant_vgg, data_calibration, device=device)\n",
     "\n",
     "print(\n",
     "    f\"With {param_c100['dataset_name']}, the maximum bit-width in the circuit = \"\n",
@@ -544,7 +546,7 @@
     "# Check the FHE-compatibility.\n",
     "data, _ = next(iter(train_loader_c10))\n",
     "\n",
-    "qmodel = fhe_compatibility(quant_vgg, data)\n",
+    "qmodel = fhe_compatibility(quant_vgg, data, device=device)\n",
     "\n",
     "print(\n",
     "    f\"With {param_c10['dataset_name']}, the circuit has a maximum bit-width of \"\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb
index fdbb8ed9f..ed89359f2 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb
@@ -36,6 +36,7 @@
     "\n",
     "import torch\n",
     "from cifar_utils import get_dataloader, plot_dataset, plot_history, torch_inference, train\n",
+    "from concrete.compiler import check_gpu_available\n",
     "from models import Fp32VGG11\n",
     "\n",
     "warnings.filterwarnings(\"ignore\")"
@@ -64,7 +65,8 @@
    "source": [
     "dataset_name = \"CIFAR_100\"\n",
     "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "use_gpu_if_available = False\n",
+    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
     "\n",
     "param_c10 = {\n",
     "    \"output_size\": 10,\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb
index 10fba027e..746a19eb7 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb
@@ -36,6 +36,8 @@
     "import numpy\n",
     "import torch\n",
     "from cifar_utils import get_dataloader, mapping_keys, plot_dataset, torch_inference, train\n",
+    "from concrete.compiler import check_gpu_available\n",
+    "from concrete.fhe.compilation import Configuration\n",
     "from sklearn.metrics import top_k_accuracy_score\n",
     "\n",
     "from concrete.ml.pytest.torch_models import QNNFashionMNIST\n",
@@ -73,7 +75,8 @@
     "    \"seed\": 42,\n",
     "}\n",
     "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "use_gpu_if_available = False\n",
+    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
     "\n",
     "print(f\"Device Type: {device}\")"
    ]
@@ -245,7 +248,10 @@
     "\n",
     "    start_time = time()\n",
     "    qmodel = compile_brevitas_qat_model(\n",
-    "        torch_model=quant_model, torch_inputset=X_calib, p_error=p_error\n",
+    "        torch_model=quant_model,\n",
+    "        torch_inputset=X_calib,\n",
+    "        p_error=p_error,\n",
+    "        configuration=Configuration(use_gpu=(device == \"cuda\")),\n",
     "    )\n",
     "    compilation_time.append((time() - start_time) / 60.0)\n",
     "\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py
index 641ec530b..ac335af7d 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py
@@ -4,7 +4,6 @@
 import warnings
 from collections import OrderedDict
 from pathlib import Path
-from time import time
 from typing import Callable, Dict, Optional, Tuple
 
 import matplotlib.pyplot as plt
@@ -14,7 +13,6 @@
 from brevitas import config
 from concrete.fhe.compilation import Configuration
 from models import Fp32VGG11
-from sklearn.metrics import top_k_accuracy_score
 from torch.utils.data.dataloader import DataLoader
 from torchvision import datasets, transforms
 from torchvision.utils import make_grid
@@ -441,12 +439,13 @@ def torch_inference(
     return np.mean(np.vstack(correct), dtype="float64")
 
 
-def fhe_compatibility(model: Callable, data: DataLoader) -> Callable:
+def fhe_compatibility(model: Callable, data: DataLoader, device: str) -> Callable:
     """Test if the model is FHE-compatible.
 
     Args:
         model (Callable): The Brevitas model.
         data (DataLoader): The data loader.
+        device (str): Specifies the device to run on, either 'cpu' or 'gpu'.
 
     Returns:
         Callable: Quantized model.
@@ -458,6 +457,7 @@ def fhe_compatibility(model: Callable, data: DataLoader) -> Callable:
         torch_inputset=data,
         show_mlir=False,
         output_onnx_file="test.onnx",
+        configuration=Configuration(use_gpu=(device == "cuda")),
     )
 
     return qmodel
diff --git a/use_case_examples/resnet/README.md b/use_case_examples/resnet/README.md
index f787d8f2a..6c43e4b1f 100644
--- a/use_case_examples/resnet/README.md
+++ b/use_case_examples/resnet/README.md
@@ -104,14 +104,14 @@ GPU machine: 8xH100 GPU machine
 
 Summary of the accuracy evaluation on ImageNet (100 images):
 
-| w&a bits | p_error | Accuracy | Top-5 Accuracy | Runtime*        | Device |
-| -------- | ------- | -------- | -------------- | --------------- | ------ |
-| fp32     | -       | 67%      | 87%            | -               | -      |
-| 6/6      | 0.05    | 55%      | 78%            | 56 min          | GPU    |
-| 6/6      | 0.05    | 55%      | 78%            | 1 h 31 min      | CPU    |
-| 7/7      | 0.05    | **66%**  | **87%**        | **2 h 12 min**  | CPU    |
-
-*Runtime reported to run the inference on a single image
+| w&a bits | p_error | Accuracy | Top-5 Accuracy | Runtime\*      | Device |
+| -------- | ------- | -------- | -------------- | -------------- | ------ |
+| fp32     | -       | 67%      | 87%            | -              | -      |
+| 6/6      | 0.05    | 55%      | 78%            | 56 min         | GPU    |
+| 6/6      | 0.05    | 55%      | 78%            | 1 h 31 min     | CPU    |
+| 7/7      | 0.05    | **66%**  | **87%**        | **2 h 12 min** | CPU    |
+
+\*Runtime reported to run the inference on a single image
 
 6/6 `n_bits` configuration: {"model_inputs": 8, "op_inputs": 6, "op_weights": 6, "model_outputs": 9}
 

From 17bdca25ab6203abbc26aa5c1af07acdbcda1e77 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Mon, 23 Sep 2024 16:18:08 +0200
Subject: [PATCH 2/5] chore: update cifar finetuning

---
 .../CifarInFhe.ipynb                          | 37 ++++++++++++++++---
 .../CifarInFheWithSmallerAccumulators.ipynb   | 27 +++++++++++---
 .../CifarQuantizationAwareTraining.ipynb      | 29 ++++++++++++---
 .../FromImageNetToCifar.ipynb                 |  4 +-
 .../PerrorImpactOnFMNIST.ipynb                | 32 +++++++++++++---
 .../cifar_brevitas_finetuning/cifar_utils.py  |  4 +-
 6 files changed, 107 insertions(+), 26 deletions(-)

diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb
index da3aa471a..4b1963574 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb
@@ -24,6 +24,7 @@
    "source": [
     "import warnings\n",
     "\n",
+    "import concrete.compiler\n",
     "import torch\n",
     "from cifar_utils import (\n",
     "    fhe_compatibility,\n",
@@ -32,7 +33,6 @@
     "    plot_dataset,\n",
     "    torch_inference,\n",
     ")\n",
-    "from concrete.compiler import check_gpu_available\n",
     "from models import QuantVGG11\n",
     "from torchvision import datasets\n",
     "\n",
@@ -63,12 +63,39 @@
     "bit = 5\n",
     "seed = 42\n",
     "\n",
-    "use_gpu_if_available = False\n",
-    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
     "print(f\"Device Type: {device}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Is GPU enabled: False\n",
+      "Is GPU available: False\n"
+     ]
+    }
+   ],
+   "source": [
+    "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n",
+    "\n",
+    "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n",
+    "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -208,7 +235,7 @@
     "\n",
     "data_calibration, _ = next(iter(train_loader_c10))\n",
     "\n",
-    "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration, device=device)\n",
+    "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration, device=compilation_device)\n",
     "\n",
     "print(\n",
     "    f\"Maximum bit-width in the circuit: {qmodel_c10.fhe_circuit.graph.maximum_integer_bit_width()}\"\n",
@@ -396,7 +423,7 @@
     "\n",
     "data_calibration, _ = next(iter(train_loader_c100))\n",
     "\n",
-    "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration, device=device)\n",
+    "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration, device=compilation_device)\n",
     "\n",
     "print(\n",
     "    f\"Maximum bit-width in the circuit: {qmodel_c100.fhe_circuit.graph.maximum_integer_bit_width()}\"\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb
index b76edd99e..e5bbda90d 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb
@@ -30,11 +30,10 @@
     "import warnings\n",
     "from typing import Callable, List, Tuple\n",
     "\n",
+    "import concrete.compiler\n",
     "import matplotlib.pyplot as plt\n",
     "import torch\n",
     "from cifar_utils import fhe_simulation_inference, get_dataloader, torch_inference\n",
-    "from concrete.compiler import check_gpu_available\n",
-    "from concrete.fhe.compilation import Configuration\n",
     "from models import QuantVGG11\n",
     "from torch.utils.data.dataloader import DataLoader\n",
     "from torchvision import datasets\n",
@@ -61,8 +60,26 @@
     "seed = 42\n",
     "rounding_thresholds_bits = [8, 7, 6, 5, 3]\n",
     "\n",
-    "use_gpu_if_available = False\n",
-    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\""
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n",
+    "\n",
+    "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n",
+    "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")"
    ]
   },
   {
@@ -94,7 +111,7 @@
     "            model.to(\"cpu\"),\n",
     "            torch_inputset=X_train,\n",
     "            rounding_threshold_bits=max_bitwidth,\n",
-    "            configuration=Configuration(use_gpu=(device == \"cuda\")),\n",
+    "            device=compilation_device,\n",
     "        )\n",
     "\n",
     "        acc_fhe_s = fhe_simulation_inference(qmodel, test_loader, True)\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
index 6e834d91f..12321a7dc 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
@@ -30,6 +30,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import concrete.compiler\n",
     "import torch\n",
     "from cifar_utils import (\n",
     "    fhe_compatibility,\n",
@@ -40,7 +41,6 @@
     "    torch_inference,\n",
     "    train,\n",
     ")\n",
-    "from concrete.compiler import check_gpu_available\n",
     "\n",
     "# As we follow the same methodology for quantization aware training for CIFAR-10 and CIFAR-100.\n",
     "# Let's import some generic functions.\n",
@@ -88,13 +88,32 @@
    ],
    "source": [
     "bit = 5\n",
+    "seed = 42\n",
     "\n",
-    "use_gpu_if_available = False\n",
-    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
     "print(f\"Device Type: {device}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n",
+    "\n",
+    "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n",
+    "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -282,7 +301,7 @@
     "\n",
     "data_calibration, _ = next(iter(train_loader_c100))\n",
     "\n",
-    "qmodel = fhe_compatibility(quant_vgg, data_calibration, device=device)\n",
+    "qmodel = fhe_compatibility(quant_vgg, data_calibration, device=compilation_device)\n",
     "\n",
     "print(\n",
     "    f\"With {param_c100['dataset_name']}, the maximum bit-width in the circuit = \"\n",
@@ -546,7 +565,7 @@
     "# Check the FHE-compatibility.\n",
     "data, _ = next(iter(train_loader_c10))\n",
     "\n",
-    "qmodel = fhe_compatibility(quant_vgg, data, device=device)\n",
+    "qmodel = fhe_compatibility(quant_vgg, data, device=compilation_device)\n",
     "\n",
     "print(\n",
     "    f\"With {param_c10['dataset_name']}, the circuit has a maximum bit-width of \"\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb
index ed89359f2..fdbb8ed9f 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb
@@ -36,7 +36,6 @@
     "\n",
     "import torch\n",
     "from cifar_utils import get_dataloader, plot_dataset, plot_history, torch_inference, train\n",
-    "from concrete.compiler import check_gpu_available\n",
     "from models import Fp32VGG11\n",
     "\n",
     "warnings.filterwarnings(\"ignore\")"
@@ -65,8 +64,7 @@
    "source": [
     "dataset_name = \"CIFAR_100\"\n",
     "\n",
-    "use_gpu_if_available = False\n",
-    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
     "param_c10 = {\n",
     "    \"output_size\": 10,\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb
index 746a19eb7..a54be16a5 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb
@@ -32,12 +32,11 @@
     "from itertools import chain\n",
     "from time import time\n",
     "\n",
+    "import concrete.compiler\n",
     "import matplotlib.pylab as plt\n",
     "import numpy\n",
     "import torch\n",
     "from cifar_utils import get_dataloader, mapping_keys, plot_dataset, torch_inference, train\n",
-    "from concrete.compiler import check_gpu_available\n",
-    "from concrete.fhe.compilation import Configuration\n",
     "from sklearn.metrics import top_k_accuracy_score\n",
     "\n",
     "from concrete.ml.pytest.torch_models import QNNFashionMNIST\n",
@@ -75,12 +74,30 @@
     "    \"seed\": 42,\n",
     "}\n",
     "\n",
-    "use_gpu_if_available = False\n",
-    "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
     "print(f\"Device Type: {device}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n",
+    "\n",
+    "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n",
+    "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -251,7 +268,7 @@
     "        torch_model=quant_model,\n",
     "        torch_inputset=X_calib,\n",
     "        p_error=p_error,\n",
-    "        configuration=Configuration(use_gpu=(device == \"cuda\")),\n",
+    "        device=compilation_device,\n",
     "    )\n",
     "    compilation_time.append((time() - start_time) / 60.0)\n",
     "\n",
@@ -359,7 +376,10 @@
     "\n",
     "# Compile the model with the optimal `p_error`\n",
     "qmodel = compile_brevitas_qat_model(\n",
-    "    torch_model=quant_model, torch_inputset=X_calib, p_error=largest_p_error\n",
+    "    torch_model=quant_model,\n",
+    "    torch_inputset=X_calib,\n",
+    "    p_error=largest_p_error,\n",
+    "    device=compilation_device,\n",
     ")\n",
     "\n",
     "# Key Generation\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py
index ac335af7d..b77d24345 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py
@@ -445,7 +445,7 @@ def fhe_compatibility(model: Callable, data: DataLoader, device: str) -> Callabl
     Args:
         model (Callable): The Brevitas model.
         data (DataLoader): The data loader.
-        device (str): Specifies the device to run on, either 'cpu' or 'gpu'.
+        device (str): Specifies the device to run during the compilation, either 'cpu' or 'gpu'.
 
     Returns:
         Callable: Quantized model.
@@ -457,7 +457,7 @@ def fhe_compatibility(model: Callable, data: DataLoader, device: str) -> Callabl
         torch_inputset=data,
         show_mlir=False,
         output_onnx_file="test.onnx",
-        configuration=Configuration(use_gpu=(device == "cuda")),
+        device=device,
     )
 
     return qmodel

From b5e215d406e6d77a9380ba45901cdbad96c4dae1 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 24 Sep 2024 16:00:30 +0200
Subject: [PATCH 3/5] chore: add use_gpu to cifar training

---
 .../cifar/cifar_brevitas_training/evaluate_one_example_fhe.py | 4 ++++
 .../cifar/cifar_brevitas_training/evaluate_torch_cml.py       | 1 +
 2 files changed, 5 insertions(+)

diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
index afec3d6a8..1aca13336 100644
--- a/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
+++ b/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 
 import torch
+from concrete.compiler import check_gpu_available
 from concrete.fhe import Exactness
 from concrete.fhe.compilation.configuration import Configuration
 from models import cnv_2w2a
@@ -22,6 +23,8 @@
 # observe a decrease in torch's top1 accuracy when using MPS devices
 # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3953
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+COMPILATION_DEVICE = "cuda" if check_gpu_available() else "cpu"
+
 NUM_SAMPLES = int(os.environ.get("NUM_SAMPLES", 1))
 P_ERROR = float(os.environ.get("P_ERROR", 0.01))
 
@@ -93,6 +96,7 @@ def wrapper(*args, **kwargs):
     configuration=configuration,
     rounding_threshold_bits={"method": Exactness.APPROXIMATE, "n_bits": 6},
     p_error=P_ERROR,
+    device=COMPILATION_DEVICE,
 )
 assert isinstance(quantized_numpy_module, QuantizedModule)
 
diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py
index 1e2ceb5b7..5838eba00 100644
--- a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py
+++ b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py
@@ -123,6 +123,7 @@ def main(args):
                 if rounding_threshold_bits is not None
                 else None
             ),
+            device=COMPILATION_DEVICE,
         )
 
         # Print max bit-width in the circuit

From 46b831e20efe3c28d3483b1ab1a1bacee47a47d7 Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Tue, 24 Sep 2024 16:00:52 +0200
Subject: [PATCH 4/5] chore: add use_gpu to resnet18

---
 use_case_examples/resnet/run_resnet18_fhe.py | 25 +++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/use_case_examples/resnet/run_resnet18_fhe.py b/use_case_examples/resnet/run_resnet18_fhe.py
index 9cdf54973..f04a6d5cd 100644
--- a/use_case_examples/resnet/run_resnet18_fhe.py
+++ b/use_case_examples/resnet/run_resnet18_fhe.py
@@ -3,6 +3,7 @@
 import time
 from pathlib import Path
 
+import concrete.compiler
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
@@ -53,7 +54,12 @@ def evaluate_model(model, processor):
 
 
 def compile_model(
-    model, images, n_bits, rounding_threshold_bits=None, fhe_mode="disable", use_gpu=False
+    model,
+    images,
+    n_bits,
+    rounding_threshold_bits=None,
+    fhe_mode="disable",
+    compilation_device="cpu",
 ):
     """
     Compile the model using either build_quantized_module or compile_torch_model.
@@ -70,7 +76,7 @@ def compile_model(
                     }
         rounding_threshold_bits: The rounding threshold bits.
         fhe_mode: The FHE mode ('disable' or 'simulate').
-        use_gpu: Whether to use GPU for compilation.
+        compilation_device: Whether to use GPU or CPU for compilation.
 
     Returns:
         The compiled quantized module.
@@ -85,7 +91,7 @@ def compile_model(
     }
 
     if fhe_mode != "disable":
-        config = Configuration(enable_tlu_fusing=True, print_tlu_fusing=False, use_gpu=use_gpu)
+        config = Configuration(enable_tlu_fusing=True, print_tlu_fusing=False)
         compile_config.update(
             {
                 "p_error": 0.05,
@@ -97,7 +103,7 @@ def compile_model(
         compile_func = build_quantized_module
 
     print(f"Compiling the model with {compile_func.__name__}...")
-    return compile_func(model, torch_inputset=images, **compile_config)
+    return compile_func(model, torch_inputset=images, **compile_config, device=compilation_device)
 
 
 def export_statistics(q_module):
@@ -270,7 +276,7 @@ def main():
         "--export_statistics", action="store_true", help="Export the circuit statistics."
     )
     parser.add_argument(
-        "--use_gpu", action="store_true", help="Use the available GPU at FHE runtime."
+        "--use_gpu", type=bool, action="store_true", help="Use the available GPU at FHE runtime."
     )
     parser.add_argument(
         "--run_experiment",
@@ -291,6 +297,11 @@ def main():
     )
     args = parser.parse_args()
 
+    if args.use_gpu and not concrete.compiler.check_gpu_available():
+        print("Follow the GPU setup guide to install the GPU-enabled Concrete ML compiler.")
+        print("GPU Enabled:", concrete.compiler.check_gpu_enabled())
+        print("GPU Available:", concrete.compiler.check_gpu_available())
+
     resnet18 = load_model()
     processor = ImageNetProcessor(
         NUM_TEST_SAMPLES, CALIBRATION_SAMPLES, cache_dir=args.dataset_cache_dir
@@ -309,7 +320,9 @@ def main():
             n_bits={"model_inputs": 8, "op_inputs": 7, "op_weights": 7, "model_outputs": 9},
             rounding_threshold_bits=7,
             fhe_mode="simulate",
-            use_gpu=args.use_gpu,
+            compilation_device=(
+                "cuda" if args.use_gpu and concrete.compiler.check_gpu_available() else "cpu"
+            ),
         )
 
         if args.export_statistics:

From e4934c867135dea9adb6d30ec1c80d0a6bf4e98f Mon Sep 17 00:00:00 2001
From: kcelia <celia.kherfallah@zama.ai>
Date: Wed, 25 Sep 2024 14:26:11 +0200
Subject: [PATCH 5/5] chore: update

---
 .../CifarQuantizationAwareTraining.ipynb              |  1 -
 .../cifar_brevitas_training/evaluate_torch_cml.py     | 11 +++++++++--
 use_case_examples/resnet/run_resnet18_fhe.py          |  2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
index 12321a7dc..42702b6c7 100644
--- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
+++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb
@@ -88,7 +88,6 @@
    ],
    "source": [
     "bit = 5\n",
-    "seed = 42\n",
     "\n",
     "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py
index 5838eba00..04c7dce52 100644
--- a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py
+++ b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py
@@ -1,6 +1,7 @@
 import argparse
 from pathlib import Path
 
+import concrete.compiler
 import numpy as np
 import torch
 from concrete.fhe import Configuration
@@ -74,8 +75,14 @@ def main(args):
     # observe a decrease in torch's top1 accuracy when using MPS devices
     # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3953
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    compilation_device = "cuda" if concrete.compiler.check_gpu_available() else "cpu"
 
-    print("Device in use:", device)
+    print("Torch device in use:", device)
+    print(
+        "To leverage the CUDA backend, follow the GPU setup guide to install the Concrete ML compiler."
+    )
+    print("GPU Enabled:", concrete.compiler.check_gpu_enabled())
+    print("GPU Available:", concrete.compiler.check_gpu_available())
 
     # Find relative path to this file
     dir_path = Path(__file__).parent.absolute()
@@ -123,7 +130,7 @@ def main(args):
                 if rounding_threshold_bits is not None
                 else None
             ),
-            device=COMPILATION_DEVICE,
+            device=compilation_device,
         )
 
         # Print max bit-width in the circuit
diff --git a/use_case_examples/resnet/run_resnet18_fhe.py b/use_case_examples/resnet/run_resnet18_fhe.py
index f04a6d5cd..2e7a3a74d 100644
--- a/use_case_examples/resnet/run_resnet18_fhe.py
+++ b/use_case_examples/resnet/run_resnet18_fhe.py
@@ -276,7 +276,7 @@ def main():
         "--export_statistics", action="store_true", help="Export the circuit statistics."
     )
     parser.add_argument(
-        "--use_gpu", type=bool, action="store_true", help="Use the available GPU at FHE runtime."
+        "--use_gpu", action="store_true", help="Use the available GPU at FHE runtime."
     )
     parser.add_argument(
         "--run_experiment",