From 7917271059966f172dc88c0fa3881fc209a87c65 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 22 May 2023 19:32:24 +0100 Subject: [PATCH 1/5] New TF version compatibility fixes --- src/transformers/modeling_tf_utils.py | 44 +++++++++------- src/transformers/pipelines/base.py | 4 +- src/transformers/tf_utils.py | 75 +++++++++++++++++++++++++++ src/transformers/utils/generic.py | 6 +-- 4 files changed, 105 insertions(+), 24 deletions(-) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 630290d9216..3d22292661f 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -38,7 +38,7 @@ from .configuration_utils import PretrainedConfig from .dynamic_module_utils import custom_object_save from .generation import GenerationConfig, TFGenerationMixin -from .tf_utils import shape_list +from .tf_utils import load_attributes_from_hdf5_group, save_attributes_to_hdf5_group, shape_list from .utils import ( DUMMY_INPUTS, SAFE_WEIGHTS_INDEX_NAME, @@ -65,16 +65,15 @@ from .utils.hub import convert_file_size_to_int, get_checkpoint_shard_files -if parse(tf.__version__) >= parse("2.11.0"): +if parse(tf.__version__).minor >= 13: + from keras import backend as K + from keras.__internal__ import KerasTensor +elif parse(tf.__version__).minor >= 11: from keras import backend as K - from keras.engine import data_adapter from keras.engine.keras_tensor import KerasTensor - from keras.saving.legacy import hdf5_format else: from tensorflow.python.keras import backend as K - from tensorflow.python.keras.engine import data_adapter from tensorflow.python.keras.engine.keras_tensor import KerasTensor - from tensorflow.python.keras.saving import hdf5_format if is_safetensors_available(): @@ -102,6 +101,18 @@ ] +def expand_1d(data): + """Expands 1-dimensional `Tensor`s into 2-dimensional `Tensor`s. + Copied from Keras to here to avoid versioning issues.""" + + def _expand_single_1d_tensor(t): + if isinstance(t, tf.Tensor) and t.shape.rank == 1: + return tf.expand_dims(t, axis=-1) + return t + + return tf.nest.map_structure(_expand_single_1d_tensor, data) + + def dummy_loss(y_true, y_pred): if y_pred.shape.rank <= 1: return y_pred @@ -507,6 +518,7 @@ def input_processing(func, config, **kwargs): if isinstance(v, allowed_types) or v is None: output[k] = v else: + print() raise ValueError(f"Data of type {type(v)} is not allowed only {allowed_types} is accepted for {k}.") if isinstance(main_input, (tuple, list)): @@ -797,9 +809,7 @@ def load_tf_shard(model, model_layer_map, resolved_archive_file, ignore_mismatch try: with h5py.File(resolved_archive_file, "r") as sharded_checkpoint_file: # Retrieve the name of each layer from the H5 file - saved_h5_model_layers_name = set( - hdf5_format.load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names") - ) + saved_h5_model_layers_name = set(load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names")) weight_value_tuples = [] # Compute missing and unexpected sub layers @@ -898,9 +908,7 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size # Read the H5 file with h5py.File(resolved_archive_file, "r") as sharded_checkpoint_file: # Retrieve the name of each layer from the H5 file - saved_h5_model_layers_name = set( - hdf5_format.load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names") - ) + saved_h5_model_layers_name = set(load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names")) # Find the missing layers from the high level list of layers missing_layers = list({layer.name for layer in model.layers} - saved_h5_model_layers_name) @@ -924,7 +932,7 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size # Create a dict from the H5 saved model that looks like {"weight_name": weight_value} # And a set with only the names - for weight_name in hdf5_format.load_attributes_from_hdf5_group(h5_layer_object, "weight_names"): + for weight_name in load_attributes_from_hdf5_group(h5_layer_object, "weight_names"): # TF names always start with the model name so we ignore it name = "/".join(weight_name.split("/")[1:]) @@ -1528,8 +1536,8 @@ def train_step(self, data): output_to_label = {val: key for key, val in label_to_output.items()} if not self._using_dummy_loss and parse(tf.__version__) < parse("2.11.0"): # Newer TF train steps leave this out - data = data_adapter.expand_1d(data) - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + data = expand_1d(data) + x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) # If the inputs are mutable dictionaries, make a shallow copy of them because we will modify # them during input/label pre-processing. This avoids surprising the user by wrecking their data. # In addition, modifying mutable Python inputs makes XLA compilation impossible. @@ -1635,8 +1643,8 @@ def test_step(self, data): output_to_label = {val: key for key, val in label_to_output.items()} if not self._using_dummy_loss and parse(tf.__version__) < parse("2.11.0"): # Newer versions leave this out - data = data_adapter.expand_1d(data) - x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + data = expand_1d(data) + x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) # If the inputs are mutable dictionaries, make a shallow copy of them because we will modify # them during input/label pre-processing. This avoids surprising the user by wrecking their data. # In addition, modifying mutable Python inputs makes XLA compilation impossible. @@ -2402,7 +2410,7 @@ def save_pretrained( ) param_dset[:] = layer.numpy() layers.append(layer_name.encode("utf8")) - hdf5_format.save_attributes_to_hdf5_group(shard_file, "layer_names", layers) + save_attributes_to_hdf5_group(shard_file, "layer_names", layers) if push_to_hub: self._upload_modified_files( diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index de6c9a8ec4d..893822b157f 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -278,7 +278,7 @@ def infer_framework_load_model( if isinstance(model, str): raise ValueError(f"Could not load model {model} with any of the following classes: {class_tuple}.") - framework = "tf" if "keras.engine.training.Model" in str(inspect.getmro(model.__class__)) else "pt" + framework = "tf" if "TFPreTrainedModel" in str(inspect.getmro(model.__class__)) else "pt" return framework, model @@ -351,7 +351,7 @@ def get_framework(model, revision: Optional[str] = None): except OSError: model = TFAutoModel.from_pretrained(model, revision=revision) - framework = "tf" if "keras.engine.training.Model" in str(inspect.getmro(model.__class__)) else "pt" + framework = "tf" if "TFPreTrainedModel" in str(inspect.getmro(model.__class__)) else "pt" return framework diff --git a/src/transformers/tf_utils.py b/src/transformers/tf_utils.py index 306f73c0b1b..f383e38dcc9 100644 --- a/src/transformers/tf_utils.py +++ b/src/transformers/tf_utils.py @@ -166,3 +166,78 @@ def check_embeddings_within_bounds(tensor: tf.Tensor, embed_dim: int, tensor_nam f"layer's input dimension ({embed_dim}). The likely cause is some problem at tokenization time." ), ) + + +def save_attributes_to_hdf5_group(group, name, data): + """Saves attributes (data) of the specified name into the HDF5 group. + + This method deals with an inherent problem of HDF5 file which is not able to store data larger than + HDF5_OBJECT_HEADER_LIMIT bytes. + + Args: + group: A pointer to a HDF5 group. + name: A name of the attributes to save. + data: Attributes data to store. + + Raises: + RuntimeError: If any single attribute is too large to be saved. + + Copied from Keras to Transformers to avoid versioning issues. + """ + HDF5_OBJECT_HEADER_LIMIT = 64512 + # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` + # because in that case even chunking the array would not make the saving + # possible. + bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT] + + # Expecting this to never be true. + if bad_attributes: + raise RuntimeError( + "The following attributes cannot be saved to HDF5 file because " + f"they are larger than {HDF5_OBJECT_HEADER_LIMIT} " + f"bytes: {bad_attributes}" + ) + + data_npy = np.asarray(data) + + num_chunks = 1 + chunked_data = np.array_split(data_npy, num_chunks) + + # This will never loop forever thanks to the test above. + while any(x.nbytes > HDF5_OBJECT_HEADER_LIMIT for x in chunked_data): + num_chunks += 1 + chunked_data = np.array_split(data_npy, num_chunks) + + if num_chunks > 1: + for chunk_id, chunk_data in enumerate(chunked_data): + group.attrs["%s%d" % (name, chunk_id)] = chunk_data + else: + group.attrs[name] = data + + +def load_attributes_from_hdf5_group(group, name): + """Loads attributes of the specified name from the HDF5 group. + + This method deals with an inherent problem of HDF5 file which is not able to store data larger than + HDF5_OBJECT_HEADER_LIMIT bytes. + + Args: + group: A pointer to a HDF5 group. + name: A name of the attributes to load. + + Returns: + data: Attributes data. + + Copied from Keras to Transformers to avoid versioning issues. + """ + if name in group.attrs: + data = [n.decode("utf8") if hasattr(n, "decode") else n for n in group.attrs[name]] + else: + data = [] + chunk_id = 0 + while "%s%d" % (name, chunk_id) in group.attrs: + data.extend( + [n.decode("utf8") if hasattr(n, "decode") else n for n in group.attrs["%s%d" % (name, chunk_id)]] + ) + chunk_id += 1 + return data diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 23214db8f85..59b8f34ab8d 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -399,8 +399,7 @@ def can_return_loss(model_class): model_class (`type`): The class of the model. """ base_classes = str(inspect.getmro(model_class)) - - if "keras.engine.training.Model" in base_classes: + if "TFPreTrainedModel" in base_classes: signature = inspect.signature(model_class.call) # TensorFlow models elif "torch.nn.modules.module.Module" in base_classes: signature = inspect.signature(model_class.forward) # PyTorch models @@ -423,8 +422,7 @@ def find_labels(model_class): """ model_name = model_class.__name__ base_classes = str(inspect.getmro(model_class)) - - if "keras.engine.training.Model" in base_classes: + if "TFPreTrainedModel" in base_classes: signature = inspect.signature(model_class.call) # TensorFlow models elif "torch.nn.modules.module.Module" in base_classes: signature = inspect.signature(model_class.forward) # PyTorch models From 65b64bdd0b53941785bb194d429709a571bc58aa Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 22 May 2023 19:52:29 +0100 Subject: [PATCH 2/5] Remove dummy print statement, move expand_1d --- src/transformers/modeling_tf_utils.py | 15 +-------------- src/transformers/tf_utils.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 3d22292661f..c58926f476c 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -38,7 +38,7 @@ from .configuration_utils import PretrainedConfig from .dynamic_module_utils import custom_object_save from .generation import GenerationConfig, TFGenerationMixin -from .tf_utils import load_attributes_from_hdf5_group, save_attributes_to_hdf5_group, shape_list +from .tf_utils import expand_1d, load_attributes_from_hdf5_group, save_attributes_to_hdf5_group, shape_list from .utils import ( DUMMY_INPUTS, SAFE_WEIGHTS_INDEX_NAME, @@ -101,18 +101,6 @@ ] -def expand_1d(data): - """Expands 1-dimensional `Tensor`s into 2-dimensional `Tensor`s. - Copied from Keras to here to avoid versioning issues.""" - - def _expand_single_1d_tensor(t): - if isinstance(t, tf.Tensor) and t.shape.rank == 1: - return tf.expand_dims(t, axis=-1) - return t - - return tf.nest.map_structure(_expand_single_1d_tensor, data) - - def dummy_loss(y_true, y_pred): if y_pred.shape.rank <= 1: return y_pred @@ -518,7 +506,6 @@ def input_processing(func, config, **kwargs): if isinstance(v, allowed_types) or v is None: output[k] = v else: - print() raise ValueError(f"Data of type {type(v)} is not allowed only {allowed_types} is accepted for {k}.") if isinstance(main_input, (tuple, list)): diff --git a/src/transformers/tf_utils.py b/src/transformers/tf_utils.py index f383e38dcc9..0900ac587c4 100644 --- a/src/transformers/tf_utils.py +++ b/src/transformers/tf_utils.py @@ -241,3 +241,15 @@ def load_attributes_from_hdf5_group(group, name): ) chunk_id += 1 return data + + +def expand_1d(data): + """Expands 1-dimensional `Tensor`s into 2-dimensional `Tensor`s. + Copied from Keras to here to avoid versioning issues.""" + + def _expand_single_1d_tensor(t): + if isinstance(t, tf.Tensor) and t.shape.rank == 1: + return tf.expand_dims(t, axis=-1) + return t + + return tf.nest.map_structure(_expand_single_1d_tensor, data) From f7e268790228d7dcb92febfc51906d7bc3d66c15 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 23 May 2023 15:12:17 +0100 Subject: [PATCH 3/5] Make a proper framework inference function --- src/transformers/utils/__init__.py | 1 + src/transformers/utils/generic.py | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 35d3638aecd..7169c7daf96 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -39,6 +39,7 @@ expand_dims, find_labels, flatten_dict, + infer_framework, is_jax_tensor, is_numpy_array, is_tensor, diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 59b8f34ab8d..7f28ad0ebb0 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -398,10 +398,10 @@ def can_return_loss(model_class): Args: model_class (`type`): The class of the model. """ - base_classes = str(inspect.getmro(model_class)) - if "TFPreTrainedModel" in base_classes: + framework = infer_framework(model_class) + if framework == "tf": signature = inspect.signature(model_class.call) # TensorFlow models - elif "torch.nn.modules.module.Module" in base_classes: + elif framework == "pt": signature = inspect.signature(model_class.forward) # PyTorch models else: signature = inspect.signature(model_class.__call__) # Flax models @@ -421,10 +421,10 @@ def find_labels(model_class): model_class (`type`): The class of the model. """ model_name = model_class.__name__ - base_classes = str(inspect.getmro(model_class)) - if "TFPreTrainedModel" in base_classes: + framework = infer_framework(model_class) + if framework == "tf": signature = inspect.signature(model_class.call) # TensorFlow models - elif "torch.nn.modules.module.Module" in base_classes: + elif framework == "pt": signature = inspect.signature(model_class.forward) # PyTorch models else: signature = inspect.signature(model_class.__call__) # Flax models @@ -563,3 +563,21 @@ def add_model_info_to_auto_map(auto_map, repo_id): auto_map[key] = f"{repo_id}--{value}" return auto_map + + +def infer_framework(model_class): + """ + Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant + classes are imported or available. + """ + for base_class in inspect.getmro(model_class): + module = base_class.__module__ + name = base_class.__name__ + if module.startswith("tensorflow") or module.startswith("keras") or name == "TFPreTrainedModel": + return "tf" + elif module.startswith("torch") or name == "PreTrainedModel": + return "pt" + elif module.startswith("flax") or module.startswith("jax") or name == "FlaxPreTrainedModel": + return "flax" + else: + raise ValueError(f"Could not infer framework from class {model_class}.") From aa3c9745a0e66f9f49237f450aa38dfe5521851a Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 23 May 2023 15:16:00 +0100 Subject: [PATCH 4/5] Make a proper framework inference function --- src/transformers/pipelines/base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 893822b157f..f57d08efffd 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -15,7 +15,6 @@ import collections import csv import importlib -import inspect import json import os import pickle @@ -36,7 +35,7 @@ from ..modelcard import ModelCard from ..models.auto.configuration_auto import AutoConfig from ..tokenization_utils import PreTrainedTokenizer -from ..utils import ModelOutput, add_end_docstrings, is_tf_available, is_torch_available, logging +from ..utils import ModelOutput, add_end_docstrings, infer_framework, is_tf_available, is_torch_available, logging GenericTensor = Union[List["GenericTensor"], "torch.Tensor", "tf.Tensor"] @@ -278,7 +277,7 @@ def infer_framework_load_model( if isinstance(model, str): raise ValueError(f"Could not load model {model} with any of the following classes: {class_tuple}.") - framework = "tf" if "TFPreTrainedModel" in str(inspect.getmro(model.__class__)) else "pt" + framework = infer_framework(model.__class__) return framework, model @@ -351,7 +350,7 @@ def get_framework(model, revision: Optional[str] = None): except OSError: model = TFAutoModel.from_pretrained(model, revision=revision) - framework = "tf" if "TFPreTrainedModel" in str(inspect.getmro(model.__class__)) else "pt" + framework = infer_framework(model.__class__) return framework From b9d7725385afbd522d767c844a95fe774ead8873 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 23 May 2023 15:18:10 +0100 Subject: [PATCH 5/5] ValueError -> TypeError --- src/transformers/utils/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 7f28ad0ebb0..afe10240837 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -580,4 +580,4 @@ def infer_framework(model_class): elif module.startswith("flax") or module.startswith("jax") or name == "FlaxPreTrainedModel": return "flax" else: - raise ValueError(f"Could not infer framework from class {model_class}.") + raise TypeError(f"Could not infer framework from class {model_class}.")