diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 747e1396fb..7376061d14 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -85,6 +85,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - ResNet - Roberta - Roformer +- RT-DETR - SAM - Segformer - SEW diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index e23716d4b7..4d9290afea 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -787,6 +787,53 @@ def outputs(self) -> Dict[str, Dict[int, str]]: return super().outputs +class RTDetrDummyInputGenerator(DummyVisionInputGenerator): + def __init__( + self, + task: str, + normalized_config: NormalizedVisionConfig, + batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"], + num_channels: int = DEFAULT_DUMMY_SHAPES["num_channels"], + width: int = DEFAULT_DUMMY_SHAPES["width"], + height: int = DEFAULT_DUMMY_SHAPES["height"], + **kwargs, + ): + super().__init__( + task=task, + normalized_config=normalized_config, + batch_size=batch_size, + num_channels=num_channels, + width=width, + height=height, + **kwargs, + ) + + from transformers.onnx.utils import get_preprocessor + + preprocessor = get_preprocessor(normalized_config._name_or_path) + if preprocessor is not None and hasattr(preprocessor, "size"): + self.height = preprocessor.size.get("height", self.height) + self.width = preprocessor.size.get("width", self.width) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + input_ = super().generate( + input_name=input_name, framework=framework, int_dtype=int_dtype, float_dtype=float_dtype + ) + return input_ + + +class RTDetrOnnxConfig(ViTOnnxConfig): + # OPSET=16 required. Otherwise we get the following error: + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::grid_sampler' to ONNX opset version 12 is not supported. Support for this operator was added in version 16, try exporting with this version. + DEFAULT_ONNX_OPSET = 16 + DUMMY_INPUT_GENERATOR_CLASSES = (RTDetrDummyInputGenerator, ) + ATOL_FOR_VALIDATION = 1e-3 + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return {"pixel_values": {0: "batch_size"}} + + class TableTransformerOnnxConfig(DetrOnnxConfig): pass diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 2896842f93..b14297711c 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -951,6 +951,11 @@ class TasksManager: onnx="RoFormerOnnxConfig", tflite="RoFormerTFLiteConfig", ), + "rt-detr": supported_tasks_mapping( + "feature-extraction", + "object-detection", + onnx="RTDetrOnnxConfig", + ), "sam": supported_tasks_mapping( "feature-extraction", onnx="SamOnnxConfig", diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 81207b7649..085651dd43 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -216,6 +216,7 @@ class NormalizedConfigManager: 'owlvit', 'perceiver', 'roformer', + 'rt-detr', 'squeezebert', 'table-transformer', """ diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 0c52754ff6..7f2662c0c2 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -260,6 +260,7 @@ "resnet": "microsoft/resnet-50", "roberta": "roberta-base", "roformer": "junnyu/roformer_chinese_base", + "rt-detr": "PekingU/rtdetr_r50vd", "sam": "facebook/sam-vit-base", "segformer": "nvidia/segformer-b0-finetuned-ade-512-512", "splinter": "hf-internal-testing/tiny-random-SplinterModel",