Skip to content

Commit

Permalink
Add tf code for efficientformer
Browse files Browse the repository at this point in the history
  • Loading branch information
D-Roberts committed May 13, 2023
1 parent cf11493 commit 2cc5a5e
Show file tree
Hide file tree
Showing 12 changed files with 1,658 additions and 9 deletions.
2 changes: 1 addition & 1 deletion docs/source/en/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ Flax), PyTorch, and/or TensorFlow.
| DonutSwin | | | | | |
| DPR | | | | | |
| DPT | | | | | |
| EfficientFormer | | | | | |
| EfficientFormer | | | | | |
| EfficientNet | | | | | |
| ELECTRA | | | | | |
| Encoder decoder | | | | | |
Expand Down
17 changes: 16 additions & 1 deletion docs/source/en/model_doc/efficientformer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work pr
reach extremely low latency on mobile devices while maintaining high performance.*

This model was contributed by [novice03](https://huggingface.co/novice03) and [Bearnardd](https://huggingface.co/Bearnardd).
The original code can be found [here](https://github.com/snap-research/EfficientFormer).
The original code can be found [here](https://github.com/snap-research/EfficientFormer). The TensorFlow version of this model was added by [D-Roberts](https://huggingface.co/D-Roberts).

## Documentation resources

Expand Down Expand Up @@ -66,3 +66,18 @@ The original code can be found [here](https://github.com/snap-research/Efficient

[[autodoc]] EfficientFormerForImageClassificationWithTeacher
- forward

## TFEfficientFormerModel

[[autodoc]] TFEfficientFormerModel
- call

## TFEfficientFormerForImageClassification

[[autodoc]] TFEfficientFormerForImageClassification
- call

## TFEfficientFormerForImageClassificationWithTeacher

[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
- call
16 changes: 16 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3129,6 +3129,15 @@
"TFDPRReader",
]
)
_import_structure["models.efficientformer"].extend(
[
"TF_EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFEfficientFormerForImageClassification",
"TFEfficientFormerForImageClassificationWithTeacher",
"TFEfficientFormerModel",
"TFEfficientFormerPreTrainedModel",
]
)
_import_structure["models.electra"].extend(
[
"TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST",
Expand Down Expand Up @@ -6431,6 +6440,13 @@
TFDPRQuestionEncoder,
TFDPRReader,
)
from .models.efficientformer import (
TF_EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
TFEfficientFormerForImageClassification,
TFEfficientFormerForImageClassificationWithTeacher,
TFEfficientFormerModel,
TFEfficientFormerPreTrainedModel,
)
from .models.electra import (
TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST,
TFElectraForMaskedLM,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
("deit", "TFDeiTModel"),
("distilbert", "TFDistilBertModel"),
("dpr", "TFDPRQuestionEncoder"),
("efficientformer", "TFEfficientFormerModel"),
("electra", "TFElectraModel"),
("esm", "TFEsmModel"),
("flaubert", "TFFlaubertModel"),
Expand Down Expand Up @@ -201,6 +202,10 @@
("cvt", "TFCvtForImageClassification"),
("data2vec-vision", "TFData2VecVisionForImageClassification"),
("deit", ("TFDeiTForImageClassification", "TFDeiTForImageClassificationWithTeacher")),
(
"efficientformer",
("TFEfficientFormerForImageClassification", "TFEfficientFormerForImageClassificationWithTeacher"),
),
("mobilevit", "TFMobileViTForImageClassification"),
("regnet", "TFRegNetForImageClassification"),
("resnet", "TFResNetForImageClassification"),
Expand Down
35 changes: 34 additions & 1 deletion src/transformers/models/efficientformer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
# limitations under the License.
from typing import TYPE_CHECKING

from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available
from ...utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_tf_available,
is_torch_available,
is_vision_available,
)


_import_structure = {
Expand Down Expand Up @@ -45,6 +51,20 @@
"EfficientFormerPreTrainedModel",
]

try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
_import_structure["modeling_tf_efficientformer"] = [
"TF_EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFEfficientFormerForImageClassification",
"TFEfficientFormerForImageClassificationWithTeacher",
"TFEfficientFormerModel",
"TFEfficientFormerPreTrainedModel",
]

if TYPE_CHECKING:
from .configuration_efficientformer import EFFICIENTFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, EfficientFormerConfig

Expand All @@ -69,6 +89,19 @@
EfficientFormerModel,
EfficientFormerPreTrainedModel,
)
try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
from .modeling_tf_efficientformer import (
TF_EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
TFEfficientFormerForImageClassification,
TFEfficientFormerForImageClassificationWithTeacher,
TFEfficientFormerModel,
TFEfficientFormerPreTrainedModel,
)

else:
import sys
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class EfficientFormerConfig(PretrainedConfig):
The size of the key in meta3D block.
attention_ratio (`int`, *optional*, defaults to 4):
Ratio of the dimension of the query and value to the dimension of the key in MSHA block
resolution (`int`, *optional*, defaults to 5)
resolution (`int`, *optional*, defaults to 7)
Size of each patch
num_hidden_layers (`int`, *optional*, defaults to 5):
Number of hidden layers in the Transformer encoder.
Expand Down Expand Up @@ -91,6 +91,8 @@ class EfficientFormerConfig(PretrainedConfig):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers.
image_size (`int`, *optional*, defaults to `224`):
The size (resolution) of each image.
Example:
Expand Down Expand Up @@ -136,6 +138,7 @@ def __init__(
hidden_act: str = "gelu",
initializer_range: float = 0.02,
layer_norm_eps: float = 1e-12,
image_size: int = 224,
**kwargs,
) -> None:
super().__init__(**kwargs)
Expand Down Expand Up @@ -165,3 +168,4 @@ def __init__(
self.distillation = distillation
self.use_layer_scale = use_layer_scale
self.layer_scale_init_value = layer_scale_init_value
self.image_size = image_size
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

# Base docstring
_CHECKPOINT_FOR_DOC = "snap-research/efficientformer-l1-300"
_EXPECTED_OUTPUT_SHAPE = [1, 197, 768]
_EXPECTED_OUTPUT_SHAPE = [1, 49, 448]

# Image classification docstring
_IMAGE_CLASS_CHECKPOINT = "snap-research/efficientformer-l1-300"
Expand Down Expand Up @@ -224,7 +224,7 @@ def __init__(
hidden_features = hidden_features or in_features

self.convolution1 = nn.Conv2d(in_features, hidden_features, 1)
self.actvation = ACT2FN[config.hidden_act]
self.activation = ACT2FN[config.hidden_act]
self.convolution2 = nn.Conv2d(hidden_features, out_features, 1)
self.dropout = nn.Dropout(drop)

Expand All @@ -235,13 +235,13 @@ def forward(self, hidden_state: torch.Tensor) -> torch.Tensor:
hidden_state = self.convolution1(hidden_state)
hidden_state = self.batchnorm_before(hidden_state)

hidden_state = self.actvation(hidden_state)
hidden_state = self.activation(hidden_state)
hidden_state = self.dropout(hidden_state)
hidden_state = self.convolution2(hidden_state)

hidden_state = self.batchnorm_after(hidden_state)

hidden_state = self.dropout(hidden_state)

return hidden_state


Expand All @@ -266,7 +266,7 @@ def drop_path(input, drop_prob: float = 0.0, training: bool = False):
return output


# Copied from transformers.models.beit.modeling_beit.BeitDropPath with Beit->Bit
# Copied from transformers.models.beit.modeling_beit.BeitDropPath with Beit->EfficientFormer
class EfficientFormerDropPath(nn.Module):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""

Expand Down Expand Up @@ -349,6 +349,7 @@ def forward(self, hidden_states: torch.Tensor, output_attentions: bool = False)
for layer_module in self.blocks:
if isinstance(hidden_states, tuple):
hidden_states = hidden_states[0]

hidden_states = layer_module(hidden_states, output_attentions)
if output_attentions:
all_attention_outputs = all_attention_outputs + (hidden_states[1],)
Expand Down Expand Up @@ -379,6 +380,7 @@ def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor]:

if self.use_layer_scale:
layer_output = hidden_states + self.drop_path(self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * outputs)

layer_output = layer_output + self.drop_path(
self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * self.mlp(layer_output)
)
Expand All @@ -398,6 +400,7 @@ def __init__(self, config: EfficientFormerConfig, stage_idx: int):
drop_paths = [
config.drop_path_rate * (block_idx + sum(config.depths[:stage_idx])) for block_idx in range(num_layers)
]

self.blocks = nn.ModuleList(
[
EfficientFormerMeta4D(config, config.hidden_sizes[stage_idx], drop_path=drop_path)
Expand Down Expand Up @@ -446,6 +449,7 @@ def __init__(self, config: EfficientFormerConfig):
for i in range(num_intermediate_stages)
]
intermediate_stages = []

for i in range(num_intermediate_stages):
intermediate_stages.append(EfficientFormerIntermediateStage(config, i))
if downsamples[i]:
Expand Down
Loading

0 comments on commit 2cc5a5e

Please sign in to comment.