From 6f76560435d85a5231e9b2f9d24dd5daae46fac7 Mon Sep 17 00:00:00 2001
From: ankandrew <61120139+ankandrew@users.noreply.github.com>
Date: Fri, 10 May 2024 19:00:21 -0300
Subject: [PATCH] Add new Argentinian model trained with more (synthetic) data

---
 README.md                                  |  7 ++++---
 docs/index.md                              |  7 ++++---
 fast_plate_ocr/inference/hub.py            | 13 ++++++++++---
 fast_plate_ocr/inference/onnx_inference.py | 18 +++++++++++-------
 4 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 9015c0b..b195047 100644
--- a/README.md
+++ b/README.md
@@ -31,9 +31,10 @@ The idea is to use this after a plate object detector, since the OCR expects the
 
 ### Available Models
 
-|          Model Name          | Time b=1<br/> (ms)<sup>[1]</sup> | Throughput <br/> (plates/second)<sup>[1]</sup> |                                                         Dataset                                                         | Accuracy<sup>[2]</sup> |              Dataset              |
-|:----------------------------:|:--------------------------------:|:----------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------:|:----------------------:|:---------------------------------:|
-| argentinian-plates-cnn-model |              2.0964              |                      477                       | [arg_plate_dataset.zip](https://github.com/ankandrew/fast-plate-ocr/releases/download/arg-plates/arg_plate_dataset.zip) |         94.05%         | Non-synthetic, plates up to 2020. |
+|              Model Name              | Time b=1<br/> (ms)<sup>[1]</sup> | Throughput <br/> (plates/second)<sup>[1]</sup> |                                                              Dataset                                                               | Accuracy<sup>[2]</sup> |                     Dataset                     |
+|:------------------------------------:|:--------------------------------:|:----------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------:|:----------------------:|:-----------------------------------------------:|
+|    `argentinian-plates-cnn-model`    |               2.1                |                      476                       |      [arg_plate_dataset.zip](https://github.com/ankandrew/fast-plate-ocr/releases/download/arg-plates/arg_plate_dataset.zip)       |         94.05%         |        Non-synthetic, plates up to 2020.        |
+| `argentinian-plates-cnn-synth-model` |               2.1                |                      476                       | [arg_plate_dataset.zip](https://github.com/ankandrew/fast-plate-ocr/releases/download/arg-plates/arg_plate_dataset_plus_synth.zip) |         94.19%         | Plates up to 2020 + Bike & car synthetic plates |
 
 _<sup>[1]</sup> Inference on Mac M1 chip using CPUExecutionProvider. Utilizing CoreMLExecutionProvider accelerates speed
 by 5x._
diff --git a/docs/index.md b/docs/index.md
index 10fd67d..e780621 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,9 +20,10 @@ The idea is to use this after a plate object detector, since the OCR expects the
 
 We currently have the following available models:
 
-|           Model Name           | Time b=1<br/> (ms)<sup>[1]</sup> | Throughput <br/> (plates/second)<sup>[1]</sup> |                                                         Dataset                                                         | Accuracy<sup>[2]</sup> |              Dataset              |
-|:------------------------------:|:--------------------------------:|:----------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------:|:----------------------:|:---------------------------------:|
-| `argentinian-plates-cnn-model` |              2.0964              |                      477                       | [arg_plate_dataset.zip](https://github.com/ankandrew/fast-plate-ocr/releases/download/arg-plates/arg_plate_dataset.zip) |         94.05%         | Non-synthetic, plates up to 2020. |
+|              Model Name              | Time b=1<br/> (ms)<sup>[1]</sup> | Throughput <br/> (plates/second)<sup>[1]</sup> |                                                              Dataset                                                               | Accuracy<sup>[2]</sup> |                     Dataset                     |
+|:------------------------------------:|:--------------------------------:|:----------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------:|:----------------------:|:-----------------------------------------------:|
+|    `argentinian-plates-cnn-model`    |               2.1                |                      476                       |      [arg_plate_dataset.zip](https://github.com/ankandrew/fast-plate-ocr/releases/download/arg-plates/arg_plate_dataset.zip)       |         94.05%         |        Non-synthetic, plates up to 2020.        |
+| `argentinian-plates-cnn-synth-model` |               2.1                |                      476                       | [arg_plate_dataset.zip](https://github.com/ankandrew/fast-plate-ocr/releases/download/arg-plates/arg_plate_dataset_plus_synth.zip) |         94.19%         | Plates up to 2020 + Bike & car synthetic plates |
 
 _<sup>[1]</sup> Inference on Mac M1 chip using CPUExecutionProvider. Utilizing CoreMLExecutionProvider accelerates speed
 by 5x._
diff --git a/fast_plate_ocr/inference/hub.py b/fast_plate_ocr/inference/hub.py
index d8d7c47..de528b9 100644
--- a/fast_plate_ocr/inference/hub.py
+++ b/fast_plate_ocr/inference/hub.py
@@ -7,18 +7,25 @@
 import shutil
 import urllib.request
 from http import HTTPStatus
+from typing import Literal
 
 from tqdm.asyncio import tqdm
 
 from fast_plate_ocr.inference.utils import safe_write
 
 BASE_URL: str = "https://github.com/ankandrew/cnn-ocr-lp/releases/download"
+OcrModel = Literal["argentinian-plates-cnn-model", "argentinian-plates-cnn-synth-model"]
 
-AVAILABLE_ONNX_MODELS: dict[str, tuple[str, str]] = {
+
+AVAILABLE_ONNX_MODELS: dict[OcrModel, tuple[str, str]] = {
     "argentinian-plates-cnn-model": (
         f"{BASE_URL}/arg-plates/arg_cnn_ocr.onnx",
         f"{BASE_URL}/arg-plates/arg_cnn_ocr_config.yaml",
-    )
+    ),
+    "argentinian-plates-cnn-synth-model": (
+        f"{BASE_URL}/arg-plates/arg_cnn_ocr_synth.onnx",
+        f"{BASE_URL}/arg-plates/arg_cnn_ocr_config.yaml",
+    ),
 }
 """Available ONNX models for doing inference."""
 
@@ -45,7 +52,7 @@ def _download_with_progress(url: str, filename: pathlib.Path) -> None:
 
 
 def download_model(
-    model_name: str,
+    model_name: OcrModel,
     save_directory: pathlib.Path | None = None,
     force_download: bool = False,
 ) -> tuple[pathlib.Path, pathlib.Path]:
diff --git a/fast_plate_ocr/inference/onnx_inference.py b/fast_plate_ocr/inference/onnx_inference.py
index 80878b6..82cf816 100644
--- a/fast_plate_ocr/inference/onnx_inference.py
+++ b/fast_plate_ocr/inference/onnx_inference.py
@@ -16,6 +16,7 @@
 from fast_plate_ocr.common.utils import measure_time
 from fast_plate_ocr.inference import hub
 from fast_plate_ocr.inference.config import load_config_from_yaml
+from fast_plate_ocr.inference.hub import OcrModel
 from fast_plate_ocr.inference.process import postprocess_output, preprocess_image, read_plate_image
 
 
@@ -62,11 +63,12 @@ class ONNXPlateRecognizer:
 
     def __init__(
         self,
-        hub_ocr_model: Literal["argentinian-plates-cnn-model"] | None = None,
-        device: Literal["gpu", "cpu", "auto"] = "auto",
+        hub_ocr_model: OcrModel | None = None,
+        device: Literal["cuda", "cpu", "auto"] = "auto",
         sess_options: ort.SessionOptions | None = None,
         model_path: str | os.PathLike[str] | None = None,
         config_path: str | os.PathLike[str] | None = None,
+        force_download: bool = False,
     ) -> None:
         """
         Initializes the ONNXPlateRecognizer with the specified OCR model and inference device.
@@ -77,26 +79,26 @@ def __init__(
 
         Args:
             hub_ocr_model: Name of the OCR model to use from the HUB.
-            device: Device type for inference. Should be one of ('cpu', 'gpu', 'auto'). If
+            device: Device type for inference. Should be one of ('cpu', 'cuda', 'auto'). If
                 'auto' mode, the device will be deduced from
                 `onnxruntime.get_available_providers()`.
             sess_options: Advanced session options for ONNX Runtime.
             model_path: Path to ONNX model file to use (In case you want to use a custom one).
             config_path: Path to config file to use (In case you want to use a custom one).
-
+            force_download: Force and download the model, even if it already exists.
         Returns:
             None.
         """
         self.logger = logging.getLogger(__name__)
 
-        if device == "gpu":
+        if device == "cuda":
             self.provider = ["CUDAExecutionProvider"]
         elif device == "cpu":
             self.provider = ["CPUExecutionProvider"]
         elif device == "auto":
             self.provider = ort.get_available_providers()
         else:
-            raise ValueError(f"Device should be one of ('cpu', 'gpu', 'auto'). Got '{device}'.")
+            raise ValueError(f"Device should be one of ('cpu', 'cuda', 'auto'). Got '{device}'.")
 
         if model_path and config_path:
             model_path = pathlib.Path(model_path)
@@ -106,7 +108,9 @@ def __init__(
             self.model_name = model_path.stem
         elif hub_ocr_model:
             self.model_name = hub_ocr_model
-            model_path, config_path = hub.download_model(model_name=hub_ocr_model)
+            model_path, config_path = hub.download_model(
+                model_name=hub_ocr_model, force_download=force_download
+            )
         else:
             raise ValueError(
                 "Either provide a model from the HUB or a custom model_path and config_path"