Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
"test_gradient_checkpointing_backward_compatibility",
"test_gradient_checkpointing_enable_disable",
"test_torch_save_load",
"test_initialization",
"test_forward_signature",
"test_model_get_set_embeddings",
"test_model_main_input_name",
Expand Down
25 changes: 0 additions & 25 deletions tests/models/aimv2/test_modeling_aimv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from ...test_modeling_common import (
TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION,
ModelTesterMixin,
_config_zero_init,
_test_eager_matches_sdpa_inference,
floats_tensor,
ids_tensor,
Expand Down Expand Up @@ -427,30 +426,6 @@ def test_model_get_set_embeddings(self):
def test_multi_gpu_data_parallel_forward(self):
pass

# Override as the `logit_scale` parameter initialization is different for Aimv2
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initialized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def test_load_vision_text_config(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down
31 changes: 1 addition & 30 deletions tests/models/align/test_modeling_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def test_batching_equivalence(self, atol=3e-4, rtol=3e-4):

@unittest.skip(reason="Start to fail after using torch `cu118`.")
def test_multi_gpu_data_parallel_forward(self):
super().test_multi_gpu_data_parallel_forward()
pass

@unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self):
Expand All @@ -488,35 +488,6 @@ def test_retain_grad_hidden_states_attentions(self):
def test_model_get_set_embeddings(self):
pass

# override as the `temperature` parameter initialization is different for ALIGN
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `temperature` is initialized as per the original implementation
if name == "temperature":
self.assertAlmostEqual(
param.data.item(),
1.0,
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
elif name == "text_projection.weight":
self.assertTrue(
-1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
self.skipTest(reason="test_torchscript is set to False")
Expand Down
23 changes: 0 additions & 23 deletions tests/models/altclip/test_modeling_altclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,29 +464,6 @@ def test_retain_grad_hidden_states_attentions(self):
def test_model_get_set_embeddings(self):
pass

# override as the `logit_scale` parameter initialization is different for AltCLIP
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initialized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
self.skipTest(reason="test_torchscript is set to False")
Expand Down
4 changes: 0 additions & 4 deletions tests/models/aria/test_modeling_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,6 @@ def setUp(self):
self.model_tester = AriaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=AriaConfig, has_text_modality=False)

@unittest.skip(reason="Unstable test")
def test_initialization(self):
pass


SKIP = False
torch_accelerator_module = getattr(torch, torch_device)
Expand Down
4 changes: 0 additions & 4 deletions tests/models/aya_vision/test_modeling_aya_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,6 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass

@unittest.skip(reason="Siglip uses a non-standard initialization scheme")
def test_initialization(self):
pass

@unittest.skip(reason="Compile not yet supported because in LLava models")
@pytest.mark.torch_compile_test
def test_sdpa_can_compile_dynamic(self):
Expand Down
26 changes: 1 addition & 25 deletions tests/models/bamba/test_modeling_bamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_tensor
from ...test_modeling_common import ModelTesterMixin, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin


Expand Down Expand Up @@ -335,30 +335,6 @@ def test_decoder_model_past_with_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)

def test_initialization(self):
r"""
Overriding the test_initialization test as the A_log and D params of the Bamba mixer are initialized differently
"""
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
if "A_log" in name:
A = torch.arange(1, config.mamba_n_heads + 1, dtype=torch.float32)
torch.testing.assert_close(param.data, torch.log(A), rtol=1e-5, atol=1e-5)
elif "D" in name:
D = torch.ones(config.mamba_n_heads, dtype=torch.float32)
torch.testing.assert_close(param.data, D, rtol=1e-5, atol=1e-5)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def test_attention_outputs(self):
r"""
Overriding the test_attention_outputs test as the Bamba model outputs attention only for its attention layers
Expand Down
20 changes: 1 addition & 19 deletions tests/models/beit/test_modeling_beit.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from ...test_backbone_common import BackboneTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin


Expand Down Expand Up @@ -382,24 +382,6 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass

def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
# we skip lambda parameters as these require special initial values
# determined by config.layer_scale_init_value
if "lambda" in name:
continue
if param.requires_grad:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

@slow
def test_model_from_pretrained(self):
model_name = "microsoft/beit-base-patch16-224"
Expand Down
17 changes: 0 additions & 17 deletions tests/models/bit/test_modeling_bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

if is_torch_available():
import torch
from torch import nn

from transformers import BitBackbone, BitForImageClassification, BitImageProcessor, BitModel

Expand Down Expand Up @@ -200,22 +199,6 @@ def test_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_backbone(*config_and_inputs)

def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
model = model_class(config=config)
for name, module in model.named_modules():
if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
self.assertTrue(
torch.all(module.weight == 1),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
self.assertTrue(
torch.all(module.bias == 0),
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
Expand Down
83 changes: 0 additions & 83 deletions tests/models/blip/test_modeling_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,41 +456,6 @@ def test_retain_grad_hidden_states_attentions(self):
def test_model_get_set_embeddings(self):
pass

# override as the `logit_scale` parameter initialization is different for Blip
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initialized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
# See PR #38607 (to avoid flakiness)
data = torch.flatten(param.data)
n_elements = torch.numel(data)
# skip 2.5% of elements on each side to avoid issues caused by `nn.init.trunc_normal_` described in
# https://github.com/huggingface/transformers/pull/27906#issuecomment-1846951332
n_elements_to_skip_on_each_side = int(n_elements * 0.025)
data_to_check = torch.sort(data).values
if n_elements_to_skip_on_each_side > 0:
data_to_check = data_to_check[
n_elements_to_skip_on_each_side:-n_elements_to_skip_on_each_side
]
self.assertIn(
((data_to_check.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
self.skipTest(reason="test_torchscript is set to False")
Expand Down Expand Up @@ -981,30 +946,6 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass

# override as the `logit_scale` parameter initialization is different for Blip
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initialized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
self.skipTest(reason="test_torchscript is set to False")
Expand Down Expand Up @@ -1194,30 +1135,6 @@ def test_training_gradient_checkpointing(self):
loss = model(**inputs).loss
loss.backward()

# override as the `logit_scale` parameter initialization is different for Blip
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

configs_no_init = _config_zero_init(config)
for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
for name, param in model.named_parameters():
if param.requires_grad:
# check if `logit_scale` is initialized as per the original implementation
if name == "logit_scale":
self.assertAlmostEqual(
param.data.item(),
np.log(1 / 0.07),
delta=1e-3,
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
else:
self.assertIn(
((param.data.mean() * 1e9).round() / 1e9).item(),
[0.0, 1.0],
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)

def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
self.skipTest(reason="test_torchscript is set to False")
Expand Down
Loading