Skip to content
9 changes: 5 additions & 4 deletions src/transformers/commands/add_new_model_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def get_model_files(model_type: str, frameworks: Optional[list[str]] = None) ->
return {"doc_file": doc_file, "model_files": model_files, "module_name": module_name, "test_files": test_files}


_re_checkpoint_for_doc = re.compile(r"^_CHECKPOINT_FOR_DOC\s+=\s+(\S*)\s*$", flags=re.MULTILINE)
_re_checkpoint_in_config = re.compile(r"\[(.+?)\]\((https://huggingface\.co/.+?)\)")


def find_base_model_checkpoint(
Expand All @@ -680,13 +680,14 @@ def find_base_model_checkpoint(
model_files = get_model_files(model_type)
module_files = model_files["model_files"]
for fname in module_files:
if "modeling" not in str(fname):
# After the @auto_docstring refactor, we expect the checkpoint to be in the configuration file's docstring
if "configuration" not in str(fname):
continue

with open(fname, "r", encoding="utf-8") as f:
content = f.read()
if _re_checkpoint_for_doc.search(content) is not None:
checkpoint = _re_checkpoint_for_doc.search(content).groups()[0]
if _re_checkpoint_in_config.search(content) is not None:
checkpoint = _re_checkpoint_in_config.search(content).groups()[0]
# Remove quotes
checkpoint = checkpoint.replace('"', "")
checkpoint = checkpoint.replace("'", "")
Expand Down
15 changes: 15 additions & 0 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,10 @@ def require_jinja(test_case):


def require_tf2onnx(test_case):
logger.warning_once(
"TensorFlow test-related code, including `require_tf2onnx`, is deprecated and will be removed in "
"Transformers v4.55"
)
return unittest.skipUnless(is_tf2onnx_available(), "test requires tf2onnx")(test_case)


Expand Down Expand Up @@ -689,6 +693,10 @@ def require_tensorflow_probability(test_case):
These tests are skipped when TensorFlow probability isn't installed.

"""
logger.warning_once(
"TensorFlow test-related code, including `require_tensorflow_probability`, is deprecated and will be "
"removed in Transformers v4.55"
)
return unittest.skipUnless(is_tensorflow_probability_available(), "test requires TensorFlow probability")(
test_case
)
Expand All @@ -715,6 +723,9 @@ def require_flax(test_case):
"""
Decorator marking a test that requires JAX & Flax. These tests are skipped when one / both are not installed
"""
logger.warning_once(
"JAX test-related code, including `require_flax`, is deprecated and will be removed in Transformers v4.55"
)
return unittest.skipUnless(is_flax_available(), "test requires JAX & Flax")(test_case)


Expand Down Expand Up @@ -758,6 +769,10 @@ def require_tensorflow_text(test_case):
Decorator marking a test that requires tensorflow_text. These tests are skipped when tensroflow_text isn't
installed.
"""
logger.warning_once(
"TensorFlow test-related code, including `require_tensorflow_text`, is deprecated and will be "
"removed in Transformers v4.55"
)
return unittest.skipUnless(is_tensorflow_text_available(), "test requires tensorflow_text")(test_case)


Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/add_distilbert_like_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
"tf",
"flax"
]
}
}
36 changes: 0 additions & 36 deletions tests/models/tapas/test_tokenization_tapas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
)
from transformers.testing_utils import (
require_pandas,
require_tensorflow_probability,
require_tokenizers,
require_torch,
slow,
Expand Down Expand Up @@ -140,41 +139,6 @@ def get_input_output_texts(self, tokenizer):
output_text = "unwanted, running"
return input_text, output_text

@require_tensorflow_probability
@slow
def test_tf_encode_plus_sent_to_model(self):
from transformers import TF_MODEL_MAPPING, TOKENIZER_MAPPING

MODEL_TOKENIZER_MAPPING = merge_model_tokenizer_mappings(TF_MODEL_MAPPING, TOKENIZER_MAPPING)

tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")

config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
config = config_class()

if config.is_encoder_decoder or config.pad_token_id is None:
self.skipTest(reason="Model is an encoder-decoder or does not have a pad token id set")

model = model_class(config)

# Make sure the model contains at least the full vocabulary size in its embedding matrix
self.assertGreaterEqual(model.config.vocab_size, len(tokenizer))

# Build sequence
first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
sequence = " ".join(first_ten_tokens)
table = self.get_table(tokenizer, length=0)
encoded_sequence = tokenizer.encode_plus(table, sequence, return_tensors="tf")
batch_encoded_sequence = tokenizer.batch_encode_plus(table, [sequence, sequence], return_tensors="tf")

# This should not fail
model(encoded_sequence)
model(batch_encoded_sequence)

def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
self.skipTest(reason="test_rust_tokenizer is set to False")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,6 @@ def check_vision_text_output_attention(
(text_config.num_attention_heads, input_ids.shape[-1], input_ids.shape[-1]),
)

def assert_almost_equals(self, a: np.ndarray, b: np.ndarray, tol: float):
diff = np.abs(a - b).max()
self.assertLessEqual(diff, tol, f"Difference between torch and flax is {diff} (>= {tol}).")

def test_vision_text_dual_encoder_model(self):
inputs_dict = self.prepare_config_and_inputs()
self.check_vision_text_dual_encoder_model(**inputs_dict)
Expand Down
6 changes: 0 additions & 6 deletions tests/models/wav2vec2/test_modeling_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,12 +813,6 @@ def flatten_output(output):
# (Even with this call, there are still memory leak by ~0.04MB)
self.clear_torch_jit_class_registry()

@unittest.skip(
"Need to investigate why config.do_stable_layer_norm is set to False here when it doesn't seem to be supported"
)
def test_flax_from_pt_safetensors(self):
return


@require_torch
class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
Expand Down
11 changes: 1 addition & 10 deletions tests/models/whisper/test_tokenization_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from transformers.models.whisper import WhisperTokenizer, WhisperTokenizerFast
from transformers.models.whisper.tokenization_whisper import _combine_tokens_into_words, _find_longest_common_sequence
from transformers.testing_utils import require_flax, require_torch, slow
from transformers.testing_utils import require_torch, slow

from ...test_tokenization_common import TokenizerTesterMixin

Expand Down Expand Up @@ -588,15 +588,6 @@ def test_convert_to_list_np(self):
self.assertListEqual(WhisperTokenizer._convert_to_list(np_array), test_list)
self.assertListEqual(WhisperTokenizerFast._convert_to_list(np_array), test_list)

@require_flax
def test_convert_to_list_jax(self):
import jax.numpy as jnp

test_list = [[1, 2, 3], [4, 5, 6]]
jax_array = jnp.array(test_list)
self.assertListEqual(WhisperTokenizer._convert_to_list(jax_array), test_list)
self.assertListEqual(WhisperTokenizerFast._convert_to_list(jax_array), test_list)

@require_torch
def test_convert_to_list_pt(self):
import torch
Expand Down
80 changes: 0 additions & 80 deletions tests/pipelines/test_pipelines_table_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@
AutoModelForTableQuestionAnswering,
AutoTokenizer,
TableQuestionAnsweringPipeline,
TFAutoModelForTableQuestionAnswering,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_pandas,
require_tensorflow_probability,
require_torch,
slow,
)
Expand Down Expand Up @@ -316,55 +313,6 @@ def test_integration_wtq_pt(self, torch_dtype="float32"):
def test_integration_wtq_pt_fp16(self):
self.test_integration_wtq_pt(torch_dtype="float16")

@slow
@require_tensorflow_probability
@require_pandas
def test_integration_wtq_tf(self):
model_id = "google/tapas-base-finetuned-wtq"
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
table_querier = pipeline("table-question-answering", model=model, tokenizer=tokenizer)

data = {
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
queries = [
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
]

results = table_querier(data, queries)

expected_results = [
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
{
"answer": "COUNT > Transformers, Datasets, Tokenizers",
"coordinates": [(0, 0), (1, 0), (2, 0)],
"cells": ["Transformers", "Datasets", "Tokenizers"],
"aggregator": "COUNT",
},
{
"answer": "AVERAGE > 36542, 4512, 3934",
"coordinates": [(0, 1), (1, 1), (2, 1)],
"cells": ["36542", "4512", "3934"],
"aggregator": "AVERAGE",
},
{
"answer": "SUM > 36542, 4512, 3934",
"coordinates": [(0, 1), (1, 1), (2, 1)],
"cells": ["36542", "4512", "3934"],
"aggregator": "SUM",
},
]
self.assertListEqual(results, expected_results)

@slow
@require_torch
def test_integration_sqa_pt(self, torch_dtype="float32"):
Expand Down Expand Up @@ -395,34 +343,6 @@ def test_integration_sqa_pt(self, torch_dtype="float32"):
def test_integration_sqa_pt_fp16(self):
self.test_integration_sqa_pt(torch_dtype="float16")

@slow
@require_tensorflow_probability
@require_pandas
def test_integration_sqa_tf(self):
model_id = "google/tapas-base-finetuned-sqa"
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
table_querier = pipeline(
"table-question-answering",
model=model,
tokenizer=tokenizer,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
"Age": ["56", "45", "59"],
"Number of movies": ["87", "53", "69"],
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
results = table_querier(data, queries, sequential=True)

expected_results = [
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]},
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
]
self.assertListEqual(results, expected_results)

@slow
@require_torch
def test_large_model_pt_tapex(self, torch_dtype="float32"):
Expand Down
22 changes: 2 additions & 20 deletions tests/test_image_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,13 @@
import numpy as np
from parameterized import parameterized

from transformers.testing_utils import require_flax, require_torch, require_vision
from transformers.utils.import_utils import is_flax_available, is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision
from transformers.utils.import_utils import is_torch_available, is_vision_available


if is_torch_available():
import torch

if is_flax_available():
import jax

if is_vision_available():
import PIL.Image

Expand Down Expand Up @@ -133,21 +130,6 @@ def test_to_pil_image_from_torch(self):
self.assertIsInstance(pil_image, PIL.Image.Image)
self.assertEqual(pil_image.size, (5, 4))

@require_flax
def test_to_pil_image_from_jax(self):
key = jax.random.PRNGKey(0)
# channel first
image = jax.random.uniform(key, (3, 4, 5))
pil_image = to_pil_image(image)
self.assertIsInstance(pil_image, PIL.Image.Image)
self.assertEqual(pil_image.size, (5, 4))

# channel last
image = jax.random.uniform(key, (4, 5, 3))
pil_image = to_pil_image(image)
self.assertIsInstance(pil_image, PIL.Image.Image)
self.assertEqual(pil_image.size, (5, 4))

def test_to_channel_dimension_format(self):
# Test that function doesn't reorder if channel dim matches the input.
image = np.random.rand(3, 4, 5)
Expand Down
4 changes: 0 additions & 4 deletions tests/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2453,10 +2453,6 @@ def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_cla

return new_tf_outputs, new_pt_outputs

def assert_almost_equals(self, a: np.ndarray, b: np.ndarray, tol: float):
diff = np.abs(a - b).max()
self.assertLessEqual(diff, tol, f"Difference between torch and flax is {diff} (>= {tol}).")

def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down
11 changes: 1 addition & 10 deletions tests/test_tokenization_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@
SpecialTokensMixin,
Trainer,
TrainingArguments,
is_flax_available,
is_tf_available,
is_torch_available,
logging,
)
Expand Down Expand Up @@ -3105,7 +3103,6 @@ def test_torch_encode_plus_sent_to_model(self):
# model(**encoded_sequence_fast)
# model(**batch_encoded_sequence_fast)

# TODO: Check if require_torch is the best to test for numpy here ... Maybe move to require_flax when available
@require_torch
@slow
def test_np_encode_plus_sent_to_model(self):
Expand All @@ -3131,7 +3128,6 @@ def test_np_encode_plus_sent_to_model(self):
encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="np")
batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="np")

# TODO: add forward through JAX/Flax when PR is merged
# This is currently here to make ruff happy !
if encoded_sequence is None:
raise ValueError("Cannot convert list to numpy tensor on encode_plus()")
Expand All @@ -3146,7 +3142,6 @@ def test_np_encode_plus_sent_to_model(self):
[sequence, sequence], return_tensors="np"
)

# TODO: add forward through JAX/Flax when PR is merged
# This is currently here to make ruff happy !
if encoded_sequence_fast is None:
raise ValueError("Cannot convert list to numpy tensor on encode_plus() (fast)")
Expand Down Expand Up @@ -3617,12 +3612,8 @@ def test_batch_encode_dynamic_overflowing(self):
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name}, {tokenizer.__class__.__name__})"):
if is_torch_available():
returned_tensor = "pt"
elif is_tf_available():
returned_tensor = "tf"
elif is_flax_available():
returned_tensor = "jax"
else:
self.skipTest(reason="No expected framework from PT, TF or JAX found")
self.skipTest(reason="No expected framework (PT) found")

if not tokenizer.pad_token or tokenizer.pad_token_id < 0:
self.skipTest(reason="This tokenizer has no padding token set, or pad_token_id < 0")
Expand Down
Loading