From 9b1c77f949a03e6c6e979415e1ad3b708662770b Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 23 Sep 2024 15:13:34 +0200 Subject: [PATCH 1/5] chore: add use_gpu for cifar finetuning --- .../cifar_brevitas_finetuning/CifarInFhe.ipynb | 8 +++++--- .../CifarInFheWithSmallerAccumulators.ipynb | 6 +++++- .../CifarQuantizationAwareTraining.ipynb | 8 +++++--- .../FromImageNetToCifar.ipynb | 4 +++- .../PerrorImpactOnFMNIST.ipynb | 10 ++++++++-- .../cifar_brevitas_finetuning/cifar_utils.py | 6 +++--- use_case_examples/resnet/README.md | 16 ++++++++-------- 7 files changed, 37 insertions(+), 21 deletions(-) diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb index 7e5e82e2b..da3aa471a 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb @@ -32,6 +32,7 @@ " plot_dataset,\n", " torch_inference,\n", ")\n", + "from concrete.compiler import check_gpu_available\n", "from models import QuantVGG11\n", "from torchvision import datasets\n", "\n", @@ -62,7 +63,8 @@ "bit = 5\n", "seed = 42\n", "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "use_gpu_if_available = False\n", + "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", "\n", "print(f\"Device Type: {device}\")" ] @@ -206,7 +208,7 @@ "\n", "data_calibration, _ = next(iter(train_loader_c10))\n", "\n", - "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration)\n", + "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration, device=device)\n", "\n", "print(\n", " f\"Maximum bit-width in the circuit: {qmodel_c10.fhe_circuit.graph.maximum_integer_bit_width()}\"\n", @@ -394,7 +396,7 @@ "\n", "data_calibration, _ = next(iter(train_loader_c100))\n", "\n", - "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration)\n", + "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration, device=device)\n", "\n", "print(\n", " f\"Maximum bit-width in the circuit: {qmodel_c100.fhe_circuit.graph.maximum_integer_bit_width()}\"\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb index b348b2bb0..b76edd99e 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb @@ -33,6 +33,8 @@ "import matplotlib.pyplot as plt\n", "import torch\n", "from cifar_utils import fhe_simulation_inference, get_dataloader, torch_inference\n", + "from concrete.compiler import check_gpu_available\n", + "from concrete.fhe.compilation import Configuration\n", "from models import QuantVGG11\n", "from torch.utils.data.dataloader import DataLoader\n", "from torchvision import datasets\n", @@ -59,7 +61,8 @@ "seed = 42\n", "rounding_thresholds_bits = [8, 7, 6, 5, 3]\n", "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" + "use_gpu_if_available = False\n", + "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"" ] }, { @@ -91,6 +94,7 @@ " model.to(\"cpu\"),\n", " torch_inputset=X_train,\n", " rounding_threshold_bits=max_bitwidth,\n", + " configuration=Configuration(use_gpu=(device == \"cuda\")),\n", " )\n", "\n", " acc_fhe_s = fhe_simulation_inference(qmodel, test_loader, True)\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb index e29ed2c2b..6e834d91f 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb @@ -40,6 +40,7 @@ " torch_inference,\n", " train,\n", ")\n", + "from concrete.compiler import check_gpu_available\n", "\n", "# As we follow the same methodology for quantization aware training for CIFAR-10 and CIFAR-100.\n", "# Let's import some generic functions.\n", @@ -88,7 +89,8 @@ "source": [ "bit = 5\n", "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "use_gpu_if_available = False\n", + "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", "\n", "print(f\"Device Type: {device}\")" ] @@ -280,7 +282,7 @@ "\n", "data_calibration, _ = next(iter(train_loader_c100))\n", "\n", - "qmodel = fhe_compatibility(quant_vgg, data_calibration)\n", + "qmodel = fhe_compatibility(quant_vgg, data_calibration, device=device)\n", "\n", "print(\n", " f\"With {param_c100['dataset_name']}, the maximum bit-width in the circuit = \"\n", @@ -544,7 +546,7 @@ "# Check the FHE-compatibility.\n", "data, _ = next(iter(train_loader_c10))\n", "\n", - "qmodel = fhe_compatibility(quant_vgg, data)\n", + "qmodel = fhe_compatibility(quant_vgg, data, device=device)\n", "\n", "print(\n", " f\"With {param_c10['dataset_name']}, the circuit has a maximum bit-width of \"\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb index fdbb8ed9f..ed89359f2 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb @@ -36,6 +36,7 @@ "\n", "import torch\n", "from cifar_utils import get_dataloader, plot_dataset, plot_history, torch_inference, train\n", + "from concrete.compiler import check_gpu_available\n", "from models import Fp32VGG11\n", "\n", "warnings.filterwarnings(\"ignore\")" @@ -64,7 +65,8 @@ "source": [ "dataset_name = \"CIFAR_100\"\n", "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "use_gpu_if_available = False\n", + "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", "\n", "param_c10 = {\n", " \"output_size\": 10,\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb index 10fba027e..746a19eb7 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb @@ -36,6 +36,8 @@ "import numpy\n", "import torch\n", "from cifar_utils import get_dataloader, mapping_keys, plot_dataset, torch_inference, train\n", + "from concrete.compiler import check_gpu_available\n", + "from concrete.fhe.compilation import Configuration\n", "from sklearn.metrics import top_k_accuracy_score\n", "\n", "from concrete.ml.pytest.torch_models import QNNFashionMNIST\n", @@ -73,7 +75,8 @@ " \"seed\": 42,\n", "}\n", "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "use_gpu_if_available = False\n", + "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", "\n", "print(f\"Device Type: {device}\")" ] @@ -245,7 +248,10 @@ "\n", " start_time = time()\n", " qmodel = compile_brevitas_qat_model(\n", - " torch_model=quant_model, torch_inputset=X_calib, p_error=p_error\n", + " torch_model=quant_model,\n", + " torch_inputset=X_calib,\n", + " p_error=p_error,\n", + " configuration=Configuration(use_gpu=(device == \"cuda\")),\n", " )\n", " compilation_time.append((time() - start_time) / 60.0)\n", "\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py index 641ec530b..ac335af7d 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py @@ -4,7 +4,6 @@ import warnings from collections import OrderedDict from pathlib import Path -from time import time from typing import Callable, Dict, Optional, Tuple import matplotlib.pyplot as plt @@ -14,7 +13,6 @@ from brevitas import config from concrete.fhe.compilation import Configuration from models import Fp32VGG11 -from sklearn.metrics import top_k_accuracy_score from torch.utils.data.dataloader import DataLoader from torchvision import datasets, transforms from torchvision.utils import make_grid @@ -441,12 +439,13 @@ def torch_inference( return np.mean(np.vstack(correct), dtype="float64") -def fhe_compatibility(model: Callable, data: DataLoader) -> Callable: +def fhe_compatibility(model: Callable, data: DataLoader, device: str) -> Callable: """Test if the model is FHE-compatible. Args: model (Callable): The Brevitas model. data (DataLoader): The data loader. + device (str): Specifies the device to run on, either 'cpu' or 'gpu'. Returns: Callable: Quantized model. @@ -458,6 +457,7 @@ def fhe_compatibility(model: Callable, data: DataLoader) -> Callable: torch_inputset=data, show_mlir=False, output_onnx_file="test.onnx", + configuration=Configuration(use_gpu=(device == "cuda")), ) return qmodel diff --git a/use_case_examples/resnet/README.md b/use_case_examples/resnet/README.md index f787d8f2a..6c43e4b1f 100644 --- a/use_case_examples/resnet/README.md +++ b/use_case_examples/resnet/README.md @@ -104,14 +104,14 @@ GPU machine: 8xH100 GPU machine Summary of the accuracy evaluation on ImageNet (100 images): -| w&a bits | p_error | Accuracy | Top-5 Accuracy | Runtime* | Device | -| -------- | ------- | -------- | -------------- | --------------- | ------ | -| fp32 | - | 67% | 87% | - | - | -| 6/6 | 0.05 | 55% | 78% | 56 min | GPU | -| 6/6 | 0.05 | 55% | 78% | 1 h 31 min | CPU | -| 7/7 | 0.05 | **66%** | **87%** | **2 h 12 min** | CPU | - -*Runtime reported to run the inference on a single image +| w&a bits | p_error | Accuracy | Top-5 Accuracy | Runtime\* | Device | +| -------- | ------- | -------- | -------------- | -------------- | ------ | +| fp32 | - | 67% | 87% | - | - | +| 6/6 | 0.05 | 55% | 78% | 56 min | GPU | +| 6/6 | 0.05 | 55% | 78% | 1 h 31 min | CPU | +| 7/7 | 0.05 | **66%** | **87%** | **2 h 12 min** | CPU | + +\*Runtime reported to run the inference on a single image 6/6 `n_bits` configuration: {"model_inputs": 8, "op_inputs": 6, "op_weights": 6, "model_outputs": 9} From 17bdca25ab6203abbc26aa5c1af07acdbcda1e77 Mon Sep 17 00:00:00 2001 From: kcelia Date: Mon, 23 Sep 2024 16:18:08 +0200 Subject: [PATCH 2/5] chore: update cifar finetuning --- .../CifarInFhe.ipynb | 37 ++++++++++++++++--- .../CifarInFheWithSmallerAccumulators.ipynb | 27 +++++++++++--- .../CifarQuantizationAwareTraining.ipynb | 29 ++++++++++++--- .../FromImageNetToCifar.ipynb | 4 +- .../PerrorImpactOnFMNIST.ipynb | 32 +++++++++++++--- .../cifar_brevitas_finetuning/cifar_utils.py | 4 +- 6 files changed, 107 insertions(+), 26 deletions(-) diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb index da3aa471a..4b1963574 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFhe.ipynb @@ -24,6 +24,7 @@ "source": [ "import warnings\n", "\n", + "import concrete.compiler\n", "import torch\n", "from cifar_utils import (\n", " fhe_compatibility,\n", @@ -32,7 +33,6 @@ " plot_dataset,\n", " torch_inference,\n", ")\n", - "from concrete.compiler import check_gpu_available\n", "from models import QuantVGG11\n", "from torchvision import datasets\n", "\n", @@ -63,12 +63,39 @@ "bit = 5\n", "seed = 42\n", "\n", - "use_gpu_if_available = False\n", - "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "print(f\"Device Type: {device}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Is GPU enabled: False\n", + "Is GPU available: False\n" + ] + } + ], + "source": [ + "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n", + "\n", + "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n", + "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -208,7 +235,7 @@ "\n", "data_calibration, _ = next(iter(train_loader_c10))\n", "\n", - "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration, device=device)\n", + "qmodel_c10 = fhe_compatibility(quant_vgg_c10, data_calibration, device=compilation_device)\n", "\n", "print(\n", " f\"Maximum bit-width in the circuit: {qmodel_c10.fhe_circuit.graph.maximum_integer_bit_width()}\"\n", @@ -396,7 +423,7 @@ "\n", "data_calibration, _ = next(iter(train_loader_c100))\n", "\n", - "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration, device=device)\n", + "qmodel_c100 = fhe_compatibility(quant_vgg_c100, data_calibration, device=compilation_device)\n", "\n", "print(\n", " f\"Maximum bit-width in the circuit: {qmodel_c100.fhe_circuit.graph.maximum_integer_bit_width()}\"\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb index b76edd99e..e5bbda90d 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarInFheWithSmallerAccumulators.ipynb @@ -30,11 +30,10 @@ "import warnings\n", "from typing import Callable, List, Tuple\n", "\n", + "import concrete.compiler\n", "import matplotlib.pyplot as plt\n", "import torch\n", "from cifar_utils import fhe_simulation_inference, get_dataloader, torch_inference\n", - "from concrete.compiler import check_gpu_available\n", - "from concrete.fhe.compilation import Configuration\n", "from models import QuantVGG11\n", "from torch.utils.data.dataloader import DataLoader\n", "from torchvision import datasets\n", @@ -61,8 +60,26 @@ "seed = 42\n", "rounding_thresholds_bits = [8, 7, 6, 5, 3]\n", "\n", - "use_gpu_if_available = False\n", - "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"" + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n", + "\n", + "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n", + "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")" ] }, { @@ -94,7 +111,7 @@ " model.to(\"cpu\"),\n", " torch_inputset=X_train,\n", " rounding_threshold_bits=max_bitwidth,\n", - " configuration=Configuration(use_gpu=(device == \"cuda\")),\n", + " device=compilation_device,\n", " )\n", "\n", " acc_fhe_s = fhe_simulation_inference(qmodel, test_loader, True)\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb index 6e834d91f..12321a7dc 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb @@ -30,6 +30,7 @@ "metadata": {}, "outputs": [], "source": [ + "import concrete.compiler\n", "import torch\n", "from cifar_utils import (\n", " fhe_compatibility,\n", @@ -40,7 +41,6 @@ " torch_inference,\n", " train,\n", ")\n", - "from concrete.compiler import check_gpu_available\n", "\n", "# As we follow the same methodology for quantization aware training for CIFAR-10 and CIFAR-100.\n", "# Let's import some generic functions.\n", @@ -88,13 +88,32 @@ ], "source": [ "bit = 5\n", + "seed = 42\n", "\n", - "use_gpu_if_available = False\n", - "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "print(f\"Device Type: {device}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n", + "\n", + "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n", + "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -282,7 +301,7 @@ "\n", "data_calibration, _ = next(iter(train_loader_c100))\n", "\n", - "qmodel = fhe_compatibility(quant_vgg, data_calibration, device=device)\n", + "qmodel = fhe_compatibility(quant_vgg, data_calibration, device=compilation_device)\n", "\n", "print(\n", " f\"With {param_c100['dataset_name']}, the maximum bit-width in the circuit = \"\n", @@ -546,7 +565,7 @@ "# Check the FHE-compatibility.\n", "data, _ = next(iter(train_loader_c10))\n", "\n", - "qmodel = fhe_compatibility(quant_vgg, data, device=device)\n", + "qmodel = fhe_compatibility(quant_vgg, data, device=compilation_device)\n", "\n", "print(\n", " f\"With {param_c10['dataset_name']}, the circuit has a maximum bit-width of \"\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb index ed89359f2..fdbb8ed9f 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/FromImageNetToCifar.ipynb @@ -36,7 +36,6 @@ "\n", "import torch\n", "from cifar_utils import get_dataloader, plot_dataset, plot_history, torch_inference, train\n", - "from concrete.compiler import check_gpu_available\n", "from models import Fp32VGG11\n", "\n", "warnings.filterwarnings(\"ignore\")" @@ -65,8 +64,7 @@ "source": [ "dataset_name = \"CIFAR_100\"\n", "\n", - "use_gpu_if_available = False\n", - "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "param_c10 = {\n", " \"output_size\": 10,\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb index 746a19eb7..a54be16a5 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/PerrorImpactOnFMNIST.ipynb @@ -32,12 +32,11 @@ "from itertools import chain\n", "from time import time\n", "\n", + "import concrete.compiler\n", "import matplotlib.pylab as plt\n", "import numpy\n", "import torch\n", "from cifar_utils import get_dataloader, mapping_keys, plot_dataset, torch_inference, train\n", - "from concrete.compiler import check_gpu_available\n", - "from concrete.fhe.compilation import Configuration\n", "from sklearn.metrics import top_k_accuracy_score\n", "\n", "from concrete.ml.pytest.torch_models import QNNFashionMNIST\n", @@ -75,12 +74,30 @@ " \"seed\": 42,\n", "}\n", "\n", - "use_gpu_if_available = False\n", - "device = \"cuda\" if use_gpu_if_available and check_gpu_available() else \"cpu\"\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "print(f\"Device Type: {device}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Concrete ML also supports a CUDA-enabled backend. To set it up, follow the instructions in the official [guide](../../../docs/guides/using_gpu.md) for installing the GPU-enabled Concrete compiler." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "compilation_device = \"cuda\" if concrete.compiler.check_gpu_available() else \"cpu\"\n", + "\n", + "print(f\"Is GPU enabled: {concrete.compiler.check_gpu_enabled()}\")\n", + "print(f\"Is GPU available: {concrete.compiler.check_gpu_available()}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -251,7 +268,7 @@ " torch_model=quant_model,\n", " torch_inputset=X_calib,\n", " p_error=p_error,\n", - " configuration=Configuration(use_gpu=(device == \"cuda\")),\n", + " device=compilation_device,\n", " )\n", " compilation_time.append((time() - start_time) / 60.0)\n", "\n", @@ -359,7 +376,10 @@ "\n", "# Compile the model with the optimal `p_error`\n", "qmodel = compile_brevitas_qat_model(\n", - " torch_model=quant_model, torch_inputset=X_calib, p_error=largest_p_error\n", + " torch_model=quant_model,\n", + " torch_inputset=X_calib,\n", + " p_error=largest_p_error,\n", + " device=compilation_device,\n", ")\n", "\n", "# Key Generation\n", diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py index ac335af7d..b77d24345 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/cifar_utils.py @@ -445,7 +445,7 @@ def fhe_compatibility(model: Callable, data: DataLoader, device: str) -> Callabl Args: model (Callable): The Brevitas model. data (DataLoader): The data loader. - device (str): Specifies the device to run on, either 'cpu' or 'gpu'. + device (str): Specifies the device to run during the compilation, either 'cpu' or 'gpu'. Returns: Callable: Quantized model. @@ -457,7 +457,7 @@ def fhe_compatibility(model: Callable, data: DataLoader, device: str) -> Callabl torch_inputset=data, show_mlir=False, output_onnx_file="test.onnx", - configuration=Configuration(use_gpu=(device == "cuda")), + device=device, ) return qmodel From b5e215d406e6d77a9380ba45901cdbad96c4dae1 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 24 Sep 2024 16:00:30 +0200 Subject: [PATCH 3/5] chore: add use_gpu to cifar training --- .../cifar/cifar_brevitas_training/evaluate_one_example_fhe.py | 4 ++++ .../cifar/cifar_brevitas_training/evaluate_torch_cml.py | 1 + 2 files changed, 5 insertions(+) diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py index afec3d6a8..1aca13336 100644 --- a/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py +++ b/use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py @@ -6,6 +6,7 @@ from pathlib import Path import torch +from concrete.compiler import check_gpu_available from concrete.fhe import Exactness from concrete.fhe.compilation.configuration import Configuration from models import cnv_2w2a @@ -22,6 +23,8 @@ # observe a decrease in torch's top1 accuracy when using MPS devices # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3953 DEVICE = "cuda" if torch.cuda.is_available() else "cpu" +COMPILATION_DEVICE = "cuda" if check_gpu_available() else "cpu" + NUM_SAMPLES = int(os.environ.get("NUM_SAMPLES", 1)) P_ERROR = float(os.environ.get("P_ERROR", 0.01)) @@ -93,6 +96,7 @@ def wrapper(*args, **kwargs): configuration=configuration, rounding_threshold_bits={"method": Exactness.APPROXIMATE, "n_bits": 6}, p_error=P_ERROR, + device=COMPILATION_DEVICE, ) assert isinstance(quantized_numpy_module, QuantizedModule) diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py index 1e2ceb5b7..5838eba00 100644 --- a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py +++ b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py @@ -123,6 +123,7 @@ def main(args): if rounding_threshold_bits is not None else None ), + device=COMPILATION_DEVICE, ) # Print max bit-width in the circuit From 46b831e20efe3c28d3483b1ab1a1bacee47a47d7 Mon Sep 17 00:00:00 2001 From: kcelia Date: Tue, 24 Sep 2024 16:00:52 +0200 Subject: [PATCH 4/5] chore: add use_gpu to resnet18 --- use_case_examples/resnet/run_resnet18_fhe.py | 25 +++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/use_case_examples/resnet/run_resnet18_fhe.py b/use_case_examples/resnet/run_resnet18_fhe.py index 9cdf54973..f04a6d5cd 100644 --- a/use_case_examples/resnet/run_resnet18_fhe.py +++ b/use_case_examples/resnet/run_resnet18_fhe.py @@ -3,6 +3,7 @@ import time from pathlib import Path +import concrete.compiler import matplotlib.pyplot as plt import numpy as np import torch @@ -53,7 +54,12 @@ def evaluate_model(model, processor): def compile_model( - model, images, n_bits, rounding_threshold_bits=None, fhe_mode="disable", use_gpu=False + model, + images, + n_bits, + rounding_threshold_bits=None, + fhe_mode="disable", + compilation_device="cpu", ): """ Compile the model using either build_quantized_module or compile_torch_model. @@ -70,7 +76,7 @@ def compile_model( } rounding_threshold_bits: The rounding threshold bits. fhe_mode: The FHE mode ('disable' or 'simulate'). - use_gpu: Whether to use GPU for compilation. + compilation_device: Whether to use GPU or CPU for compilation. Returns: The compiled quantized module. @@ -85,7 +91,7 @@ def compile_model( } if fhe_mode != "disable": - config = Configuration(enable_tlu_fusing=True, print_tlu_fusing=False, use_gpu=use_gpu) + config = Configuration(enable_tlu_fusing=True, print_tlu_fusing=False) compile_config.update( { "p_error": 0.05, @@ -97,7 +103,7 @@ def compile_model( compile_func = build_quantized_module print(f"Compiling the model with {compile_func.__name__}...") - return compile_func(model, torch_inputset=images, **compile_config) + return compile_func(model, torch_inputset=images, **compile_config, device=compilation_device) def export_statistics(q_module): @@ -270,7 +276,7 @@ def main(): "--export_statistics", action="store_true", help="Export the circuit statistics." ) parser.add_argument( - "--use_gpu", action="store_true", help="Use the available GPU at FHE runtime." + "--use_gpu", type=bool, action="store_true", help="Use the available GPU at FHE runtime." ) parser.add_argument( "--run_experiment", @@ -291,6 +297,11 @@ def main(): ) args = parser.parse_args() + if args.use_gpu and not concrete.compiler.check_gpu_available(): + print("Follow the GPU setup guide to install the GPU-enabled Concrete ML compiler.") + print("GPU Enabled:", concrete.compiler.check_gpu_enabled()) + print("GPU Available:", concrete.compiler.check_gpu_available()) + resnet18 = load_model() processor = ImageNetProcessor( NUM_TEST_SAMPLES, CALIBRATION_SAMPLES, cache_dir=args.dataset_cache_dir @@ -309,7 +320,9 @@ def main(): n_bits={"model_inputs": 8, "op_inputs": 7, "op_weights": 7, "model_outputs": 9}, rounding_threshold_bits=7, fhe_mode="simulate", - use_gpu=args.use_gpu, + compilation_device=( + "cuda" if args.use_gpu and concrete.compiler.check_gpu_available() else "cpu" + ), ) if args.export_statistics: From e4934c867135dea9adb6d30ec1c80d0a6bf4e98f Mon Sep 17 00:00:00 2001 From: kcelia Date: Wed, 25 Sep 2024 14:26:11 +0200 Subject: [PATCH 5/5] chore: update --- .../CifarQuantizationAwareTraining.ipynb | 1 - .../cifar_brevitas_training/evaluate_torch_cml.py | 11 +++++++++-- use_case_examples/resnet/run_resnet18_fhe.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb index 12321a7dc..42702b6c7 100644 --- a/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb +++ b/use_case_examples/cifar/cifar_brevitas_finetuning/CifarQuantizationAwareTraining.ipynb @@ -88,7 +88,6 @@ ], "source": [ "bit = 5\n", - "seed = 42\n", "\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", diff --git a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py index 5838eba00..04c7dce52 100644 --- a/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py +++ b/use_case_examples/cifar/cifar_brevitas_training/evaluate_torch_cml.py @@ -1,6 +1,7 @@ import argparse from pathlib import Path +import concrete.compiler import numpy as np import torch from concrete.fhe import Configuration @@ -74,8 +75,14 @@ def main(args): # observe a decrease in torch's top1 accuracy when using MPS devices # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3953 device = "cuda" if torch.cuda.is_available() else "cpu" + compilation_device = "cuda" if concrete.compiler.check_gpu_available() else "cpu" - print("Device in use:", device) + print("Torch device in use:", device) + print( + "To leverage the CUDA backend, follow the GPU setup guide to install the Concrete ML compiler." + ) + print("GPU Enabled:", concrete.compiler.check_gpu_enabled()) + print("GPU Available:", concrete.compiler.check_gpu_available()) # Find relative path to this file dir_path = Path(__file__).parent.absolute() @@ -123,7 +130,7 @@ def main(args): if rounding_threshold_bits is not None else None ), - device=COMPILATION_DEVICE, + device=compilation_device, ) # Print max bit-width in the circuit diff --git a/use_case_examples/resnet/run_resnet18_fhe.py b/use_case_examples/resnet/run_resnet18_fhe.py index f04a6d5cd..2e7a3a74d 100644 --- a/use_case_examples/resnet/run_resnet18_fhe.py +++ b/use_case_examples/resnet/run_resnet18_fhe.py @@ -276,7 +276,7 @@ def main(): "--export_statistics", action="store_true", help="Export the circuit statistics." ) parser.add_argument( - "--use_gpu", type=bool, action="store_true", help="Use the available GPU at FHE runtime." + "--use_gpu", action="store_true", help="Use the available GPU at FHE runtime." ) parser.add_argument( "--run_experiment",