add top3 HF presets

pkgoogle · pkgoogle · commit e9e364557871 · 2025-02-19T15:06:46.000-08:00
diff --git a/keras_hub/src/models/mobilenet/mobilenet_backbone.py b/keras_hub/src/models/mobilenet/mobilenet_backbone.py
@@ -142,6 +142,8 @@ class DepthwiseConvBlock(keras.layers.Layer):
             signal into before reexciting back out. If (>1) technically, it's an
             excite & squeeze layer. If this doesn't exist there is no
             SqueezeExcite layer.
+        residual: bool, default False. True if we want a residual connection. If
+            False, there is no residual connection.
         name: str, name of the layer
         dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
             to use for the model's computations and weights.
@@ -161,6 +163,7 @@ def __init__(
         kernel_size=3,
         stride=2,
         squeeze_excite_ratio=None,
+        residual=False,
         name=None,
         dtype=None,
         **kwargs,
@@ -171,6 +174,7 @@ def __init__(
         self.kernel_size = kernel_size
         self.stride = stride
         self.squeeze_excite_ratio = squeeze_excite_ratio
+        self.residual = residual
         self.name = name
 
         channel_axis = (
@@ -256,11 +260,15 @@ def call(self, inputs):
         x = self.batch_normalization1(x)
         x = self.activation1(x)
 
-        if self.se_layer:
+        if self.squeeze_excite_ratio:
             x = self.se_layer(x)
 
         x = self.conv2(x)
         x = self.batch_normalization2(x)
+
+        if self.residual:
+            x = x + inputs
+
         return x
 
     def get_config(self):
@@ -272,6 +280,7 @@ def get_config(self):
                 "kernel_size": self.kernel_size,
                 "stride": self.stride,
                 "squeeze_excite_ratio": self.squeeze_excite_ratio,
+                "residual": self.residual,
                 "name": self.name,
             }
         )
@@ -675,6 +684,8 @@ def __init__(
         stackwise_padding,
         output_num_filters,
         depthwise_filters,
+        depthwise_stride,
+        depthwise_residual,
         last_layer_filter,
         squeeze_and_excite=None,
         image_shape=(None, None, 3),
@@ -722,7 +733,9 @@ def __init__(
         x = DepthwiseConvBlock(
             input_num_filters,
             depthwise_filters,
+            stride=depthwise_stride,
             squeeze_excite_ratio=squeeze_and_excite,
+            residual=depthwise_residual,
             name="block_0",
             dtype=dtype,
         )(x)
@@ -768,6 +781,7 @@ def __init__(
         self.input_num_filters = input_num_filters
         self.output_num_filters = output_num_filters
         self.depthwise_filters = depthwise_filters
+        self.depthwise_stride = depthwise_stride
         self.last_layer_filter = last_layer_filter
         self.squeeze_and_excite = squeeze_and_excite
         self.input_activation = input_activation
@@ -790,6 +804,7 @@ def get_config(self):
                 "input_num_filters": self.input_num_filters,
                 "output_num_filters": self.output_num_filters,
                 "depthwise_filters": self.depthwise_filters,
+                "depthwise_stride": self.depthwise_stride,
                 "last_layer_filter": self.last_layer_filter,
                 "squeeze_and_excite": self.squeeze_and_excite,
                 "input_activation": self.input_activation,
diff --git a/keras_hub/src/models/mobilenet/mobilenet_image_classifier.py b/keras_hub/src/models/mobilenet/mobilenet_image_classifier.py
@@ -18,6 +18,7 @@ def __init__(
         self,
         backbone,
         num_classes,
+        num_features=1024,
         preprocessor=None,
         head_dtype=None,
         **kwargs,
@@ -33,7 +34,7 @@ def __init__(
         )
 
         self.output_conv = keras.layers.Conv2D(
-            filters=1024,
+            filters=num_features,
             kernel_size=(1, 1),
             strides=(1, 1),
             use_bias=True,
diff --git a/keras_hub/src/utils/preset_utils.py b/keras_hub/src/utils/preset_utils.py
@@ -622,6 +622,9 @@ def load_task(self, cls, load_weights, load_task_weights, **kwargs):
             kwargs["preprocessor"] = self.load_preprocessor(
                 cls.preprocessor_cls,
             )
+        if "num_features" not in kwargs and "num_features" in self.config:
+            kwargs["num_features"] = self.config["num_features"]
+
         return cls(**kwargs)
 
     def load_preprocessor(
diff --git a/keras_hub/src/utils/timm/convert_mobilenet.py b/keras_hub/src/utils/timm/convert_mobilenet.py
@@ -8,64 +8,135 @@
 def convert_backbone_config(timm_config):
     timm_architecture = timm_config["architecture"]
 
-    if "mobilenetv3_" in timm_architecture:
-        input_activation = "hard_swish"
-        output_activation = "hard_swish"
-    else:
-        input_activation = "relu6"
-        output_activation = "relu6"
-
-    if timm_architecture == "mobilenetv3_small_050":
-        stackwise_num_blocks = [2, 3, 2, 3]
-        stackwise_expansion = [
+    kwargs = {
+        "stackwise_num_blocks": [2, 3, 2, 3],
+        "stackwise_expansion": [
             [40, 56],
             [64, 144, 144],
             [72, 72],
             [144, 288, 288],
-        ]
-        stackwise_num_filters = [[16, 16], [24, 24, 24], [24, 24], [48, 48, 48]]
-        stackwise_kernel_size = [[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]]
-        stackwise_num_strides = [[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]]
-        stackwise_se_ratio = [
+        ],
+        "stackwise_num_filters": [
+            [16, 16],
+            [24, 24, 24],
+            [24, 24],
+            [48, 48, 48],
+        ],
+        "stackwise_kernel_size": [[3, 3], [5, 5, 5], [5, 5], [5, 5, 5]],
+        "stackwise_num_strides": [[2, 1], [2, 1, 1], [1, 1], [2, 1, 1]],
+        "stackwise_se_ratio": [
             [None, None],
             [0.25, 0.25, 0.25],
             [0.25, 0.25],
             [0.25, 0.25, 0.25],
-        ]
-        stackwise_activation = [
+        ],
+        "stackwise_activation": [
             ["relu", "relu"],
             ["hard_swish", "hard_swish", "hard_swish"],
             ["hard_swish", "hard_swish"],
             ["hard_swish", "hard_swish", "hard_swish"],
-        ]
-        stackwise_padding = [[1, 1], [2, 2, 2], [2, 2], [2, 2, 2]]
-        output_num_filters = 1024
-        input_num_filters = 16
-        depthwise_filters = 8
-        squeeze_and_excite = 0.5
-        last_layer_filter = 288
+        ],
+        "stackwise_padding": [[1, 1], [2, 2, 2], [2, 2], [2, 2, 2]],
+        "output_num_filters": 1024,
+        "input_num_filters": 16,
+        "depthwise_filters": 8,
+        "depthwise_stride": 2,
+        "depthwise_residual": False,
+        "squeeze_and_excite": 0.5,
+        "last_layer_filter": 288,
+        "input_activation": "relu6",
+        "output_activation": "relu6",
+    }
+
+    if "mobilenetv3_" in timm_architecture:
+        kwargs["input_activation"] = "hard_swish"
+        kwargs["output_activation"] = "hard_swish"
+
+    if timm_architecture == "mobilenetv3_small_050":
+        pass
+    elif timm_architecture == "mobilenetv3_small_100":
+        modified_kwargs = {
+            "stackwise_expansion": [
+                [72, 88],
+                [96, 240, 240],
+                [120, 144],
+                [288, 576, 576],
+            ],
+            "stackwise_num_filters": [
+                [24, 24],
+                [40, 40, 40],
+                [48, 48],
+                [96, 96, 96],
+            ],
+            "depthwise_filters": 16,
+            "last_layer_filter": 576,
+        }
+        kwargs.update(modified_kwargs)
+    elif timm_architecture.startswith("mobilenetv3_large_100"):
+        modified_kwargs = {
+            "stackwise_num_blocks": [2, 3, 4, 2, 3],
+            "stackwise_expansion": [
+                [64, 72],
+                [72, 120, 120],
+                [240, 200, 184, 184],
+                [480, 672],
+                [672, 960, 960],
+            ],
+            "stackwise_num_filters": [
+                [24, 24],
+                [40, 40, 40],
+                [80, 80, 80, 80],
+                [112, 112],
+                [160, 160, 160],
+            ],
+            "stackwise_kernel_size": [
+                [3, 3],
+                [5, 5, 5],
+                [3, 3, 3, 3],
+                [3, 3],
+                [5, 5, 5],
+            ],
+            "stackwise_num_strides": [
+                [2, 1],
+                [2, 1, 1],
+                [2, 1, 1, 1],
+                [1, 1],
+                [2, 1, 1],
+            ],
+            "stackwise_se_ratio": [
+                [None, None],
+                [0.25, 0.25, 0.25],
+                [None, None, None, None],
+                [0.25, 0.25],
+                [0.25, 0.25, 0.25],
+            ],
+            "stackwise_activation": [
+                ["relu", "relu"],
+                ["relu", "relu", "relu"],
+                ["hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+                ["hard_swish", "hard_swish"],
+                ["hard_swish", "hard_swish", "hard_swish"],
+            ],
+            "stackwise_padding": [
+                [1, 1],
+                [2, 2, 2],
+                [1, 1, 1, 1],
+                [1, 1],
+                [2, 2, 2],
+            ],
+            "depthwise_filters": 16,
+            "depthwise_stride": 1,
+            "depthwise_residual": True,
+            "squeeze_and_excite": None,
+            "last_layer_filter": 960,
+        }
+        kwargs.update(modified_kwargs)
     else:
         raise ValueError(
             f"Currently, the architecture {timm_architecture} is not supported."
         )
 
-    return dict(
-        input_num_filters=input_num_filters,
-        input_activation=input_activation,
-        depthwise_filters=depthwise_filters,
-        squeeze_and_excite=squeeze_and_excite,
-        stackwise_num_blocks=stackwise_num_blocks,
-        stackwise_expansion=stackwise_expansion,
-        stackwise_num_filters=stackwise_num_filters,
-        stackwise_kernel_size=stackwise_kernel_size,
-        stackwise_num_strides=stackwise_num_strides,
-        stackwise_se_ratio=stackwise_se_ratio,
-        stackwise_activation=stackwise_activation,
-        stackwise_padding=stackwise_padding,
-        output_num_filters=output_num_filters,
-        output_activation=output_activation,
-        last_layer_filter=last_layer_filter,
-    )
+    return kwargs
 
 
 def convert_weights(backbone, loader, timm_config):
@@ -120,9 +191,14 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
     port_conv2d(stem_block.conv1, f"{hf_name}.conv_dw")
     port_batch_normalization(stem_block.batch_normalization1, f"{hf_name}.bn1")
 
-    stem_se_block = stem_block.se_layer
-    port_conv2d(stem_se_block.conv_reduce, f"{hf_name}.se.conv_reduce", True)
-    port_conv2d(stem_se_block.conv_expand, f"{hf_name}.se.conv_expand", True)
+    if stem_block.squeeze_excite_ratio:
+        stem_se_block = stem_block.se_layer
+        port_conv2d(
+            stem_se_block.conv_reduce, f"{hf_name}.se.conv_reduce", True
+        )
+        port_conv2d(
+            stem_se_block.conv_expand, f"{hf_name}.se.conv_expand", True
+        )
 
     port_conv2d(stem_block.conv2, f"{hf_name}.conv_pw")
     port_batch_normalization(stem_block.batch_normalization2, f"{hf_name}.bn2")
diff --git a/tools/checkpoint_conversion/convert_mobilenet_checkpoints.py b/tools/checkpoint_conversion/convert_mobilenet_checkpoints.py
@@ -1,7 +1,13 @@
 """Convert mobilenet checkpoints.
 
 python tools/checkpoint_conversion/convert_mobilenet_checkpoints.py \
-    --preset mobilenetv3_small_050 --upload_uri kaggle://alexbutcher/mobilenet/keras/mobilenetv3_small_050
+    --preset mobilenetv3_small_050 --upload_uri kaggle://keras/mobilenetv3/keras/mobilenet_v3_small_050_imagenet/1
+python tools/checkpoint_conversion/convert_mobilenet_checkpoints.py \
+    --preset mobilenetv3_small_100 --upload_uri kaggle://keras/mobilenetv3/keras/mobilenet_v3_small_100_imagenet/1
+python tools/checkpoint_conversion/convert_mobilenet_checkpoints.py \
+    --preset mobilenetv3_large_100.ra_in1k --upload_uri kaggle://keras/mobilenetv3/keras/mobilenet_v3_large_100_imagenet/1
+python tools/checkpoint_conversion/convert_mobilenet_checkpoints.py \
+    --preset mobilenetv3_large_100.miil_in21k_ft_in1k --upload_uri kaggle://keras/mobilenetv3/keras/mobilenet_v3_large_100_imagenet_21k/1
 """
 
 import os
@@ -19,6 +25,9 @@
 
 PRESET_MAP = {
     "mobilenetv3_small_050": "timm/mobilenetv3_small_050.lamb_in1k",
+    "mobilenetv3_small_100": "timm/mobilenetv3_small_100.lamb_in1k",
+    "mobilenetv3_large_100.ra_in1k": "timm/mobilenetv3_large_100.ra_in1k",
+    "mobilenetv3_large_100.miil_in21k_ft_in1k": "timm/mobilenetv3_large_100.miil_in21k_ft_in1k",  # noqa: E501
 }
 FLAGS = flags.FLAGS
 

Original file line number	Diff line number	Diff line change
`@@ -622,6 +622,9 @@ def load_task(self, cls, load_weights, load_task_weights, **kwargs):`
`622`	`622`	`kwargs["preprocessor"] = self.load_preprocessor(`
`623`	`623`	`cls.preprocessor_cls,`
`624`	`624`	`)`
	`625`	`+ if "num_features" not in kwargs and "num_features" in self.config:`
	`626`	`+ kwargs["num_features"] = self.config["num_features"]`
	`627`	`+`
`625`	`628`	`return cls(**kwargs)`
`626`	`629`
`627`	`630`	`def load_preprocessor(`