huggingface · xenova · Jun 28, 2024 · Jun 28, 2024 · Jun 28, 2024 · merveenoyan
diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
@@ -85,6 +85,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - ResNet
 - Roberta
 - Roformer
+- RT-DETR
 - SAM
 - Segformer
 - SEW

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -787,6 +787,53 @@ def outputs(self) -> Dict[str, Dict[int, str]]:
  return super().outputs
 
 
+class RTDetrDummyInputGenerator(DummyVisionInputGenerator):
+ def __init__(
+ self,
+ task: str,
+ normalized_config: NormalizedVisionConfig,
+ batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
+ num_channels: int = DEFAULT_DUMMY_SHAPES["num_channels"],
+ width: int = DEFAULT_DUMMY_SHAPES["width"],
+ height: int = DEFAULT_DUMMY_SHAPES["height"],
+ **kwargs,
+ ):
+ super().__init__(
+ task=task,
+ normalized_config=normalized_config,
+ batch_size=batch_size,
+ num_channels=num_channels,
+ width=width,
+ height=height,
+ **kwargs,
+ )
+
+ from transformers.onnx.utils import get_preprocessor
+
+ preprocessor = get_preprocessor(normalized_config._name_or_path)
+ if preprocessor is not None and hasattr(preprocessor, "size"):
+ self.height = preprocessor.size.get("height", self.height)
+ self.width = preprocessor.size.get("width", self.width)
+
+ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+ input_ = super().generate(
+ input_name=input_name, framework=framework, int_dtype=int_dtype, float_dtype=float_dtype
+ )
+ return input_
+
+
+class RTDetrOnnxConfig(ViTOnnxConfig):
+ # OPSET=16 required. Otherwise we get the following error:
+ # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::grid_sampler' to ONNX opset version 12 is not supported. Support for this operator was added in version 16, try exporting with this version.
+ DEFAULT_ONNX_OPSET = 16
+ DUMMY_INPUT_GENERATOR_CLASSES = (RTDetrDummyInputGenerator, )
+ ATOL_FOR_VALIDATION = 1e-3
+
+ @property
+ def inputs(self) -> Dict[str, Dict[int, str]]:
+ return {"pixel_values": {0: "batch_size"}}
+
+
 class TableTransformerOnnxConfig(DetrOnnxConfig):
  pass
 

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -951,6 +951,11 @@ class TasksManager:
  onnx="RoFormerOnnxConfig",
  tflite="RoFormerTFLiteConfig",
  ),
+ "rt-detr": supported_tasks_mapping(
+ "feature-extraction",
+ "object-detection",
+ onnx="RTDetrOnnxConfig",
+ ),
  "sam": supported_tasks_mapping(
  "feature-extraction",
  onnx="SamOnnxConfig",

diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
@@ -216,6 +216,7 @@ class NormalizedConfigManager:
  'owlvit',
  'perceiver',
  'roformer',
+ 'rt-detr',
  'squeezebert',
  'table-transformer',
  """

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -260,6 +260,7 @@
  "resnet": "microsoft/resnet-50",
  "roberta": "roberta-base",
  "roformer": "junnyu/roformer_chinese_base",
+ "rt-detr": "PekingU/rtdetr_r50vd",
  "sam": "facebook/sam-vit-base",
  "segformer": "nvidia/segformer-b0-finetuned-ade-512-512",
  "splinter": "hf-internal-testing/tiny-random-SplinterModel",