Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove static pretrained maps from the library's internals #29112

Merged
merged 13 commits into from
Mar 25, 2024
  •  
  •  
  •  
4 changes: 0 additions & 4 deletions examples/research_projects/bertabs/modeling_bertabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@

MAX_SIZE = 5000

BERTABS_FINETUNED_MODEL_ARCHIVE_LIST = [
"remi/bertabs-finetuned-cnndm-extractive-abstractive-summarization",
]


class BertAbsPreTrainedModel(PreTrainedModel):
config_class = BertAbsConfig
Expand Down
6 changes: 2 additions & 4 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@
"ProphetNetTokenizer",
],
"models.pvt": ["PVT_PRETRAINED_CONFIG_ARCHIVE_MAP", "PvtConfig"],
"models.pvt_v2": ["PVT_V2_PRETRAINED_CONFIG_ARCHIVE_MAP", "PvtV2Config"],
"models.pvt_v2": ["PvtV2Config"],
"models.qdqbert": ["QDQBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "QDQBertConfig"],
"models.qwen2": [
"QWEN2_PRETRAINED_CONFIG_ARCHIVE_MAP",
Expand Down Expand Up @@ -3050,7 +3050,6 @@
)
_import_structure["models.pvt_v2"].extend(
[
"PVT_V2_PRETRAINED_MODEL_ARCHIVE_LIST",
"PvtV2Backbone",
"PvtV2ForImageClassification",
"PvtV2Model",
Expand Down Expand Up @@ -5602,7 +5601,7 @@
ProphetNetTokenizer,
)
from .models.pvt import PVT_PRETRAINED_CONFIG_ARCHIVE_MAP, PvtConfig
from .models.pvt_v2 import PVT_V2_PRETRAINED_CONFIG_ARCHIVE_MAP, PvtV2Config
from .models.pvt_v2 import PvtV2Config
from .models.qdqbert import QDQBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, QDQBertConfig
LysandreJik marked this conversation as resolved.
Show resolved Hide resolved
from .models.qwen2 import QWEN2_PRETRAINED_CONFIG_ARCHIVE_MAP, Qwen2Config, Qwen2Tokenizer
from .models.rag import RagConfig, RagRetriever, RagTokenizer
Expand Down Expand Up @@ -7623,7 +7622,6 @@
PvtPreTrainedModel,
)
from .models.pvt_v2 import (
PVT_V2_PRETRAINED_MODEL_ARCHIVE_LIST,
PvtV2Backbone,
PvtV2ForImageClassification,
PvtV2Model,
Expand Down
52 changes: 1 addition & 51 deletions src/transformers/commands/add_new_model_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,35 +527,6 @@ def duplicate_module(
# Loop and treat all objects
new_objects = []
for obj in objects:
# Special cases
if "PRETRAINED_CONFIG_ARCHIVE_MAP = {" in obj:
# docstyle-ignore
obj = (
f"{new_model_patterns.model_upper_cased}_PRETRAINED_CONFIG_ARCHIVE_MAP = "
+ "{"
+ f"""
"{new_model_patterns.checkpoint}": "https://huggingface.co/{new_model_patterns.checkpoint}/resolve/main/config.json",
"""
+ "}\n"
)
new_objects.append(obj)
continue
elif "PRETRAINED_MODEL_ARCHIVE_LIST = [" in obj:
if obj.startswith("TF_"):
prefix = "TF_"
elif obj.startswith("FLAX_"):
prefix = "FLAX_"
else:
prefix = ""
# docstyle-ignore
obj = f"""{prefix}{new_model_patterns.model_upper_cased}_PRETRAINED_MODEL_ARCHIVE_LIST = [
"{new_model_patterns.checkpoint}",
# See all {new_model_patterns.model_name} models at https://huggingface.co/models?filter={new_model_patterns.model_type}
]
"""
new_objects.append(obj)
continue

special_pattern = False
for pattern, attr in SPECIAL_PATTERNS.items():
if pattern in obj:
Expand Down Expand Up @@ -785,7 +756,6 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):

model_name = auto_module.MODEL_NAMES_MAPPING[model_type]
config_class = auto_module.configuration_auto.CONFIG_MAPPING_NAMES[model_type]
archive_map = auto_module.configuration_auto.CONFIG_ARCHIVE_MAP_MAPPING_NAMES.get(model_type, None)
if model_type in auto_module.tokenization_auto.TOKENIZER_MAPPING_NAMES:
tokenizer_classes = auto_module.tokenization_auto.TOKENIZER_MAPPING_NAMES[model_type]
tokenizer_class = tokenizer_classes[0] if tokenizer_classes[0] is not None else tokenizer_classes[1]
Expand Down Expand Up @@ -814,19 +784,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):

model_classes = retrieve_model_classes(model_type, frameworks=frameworks)

# Retrieve model upper-cased name from the constant name of the pretrained archive map.
if archive_map is None:
model_upper_cased = model_camel_cased.upper()
else:
parts = archive_map.split("_")
idx = 0
while idx < len(parts) and parts[idx] != "PRETRAINED":
idx += 1
if idx < len(parts):
model_upper_cased = "_".join(parts[:idx])
else:
model_upper_cased = model_camel_cased.upper()

model_upper_cased = model_camel_cased.upper()
model_patterns = ModelPatterns(
model_name,
checkpoint=find_base_model_checkpoint(model_type, model_files=model_files),
Expand Down Expand Up @@ -1135,14 +1093,6 @@ def add_model_to_auto_classes(
for attr in ["model_type", "model_name"]:
old_model_line = old_model_line.replace("{" + attr + "}", getattr(old_model_patterns, attr))
new_model_line = new_model_line.replace("{" + attr + "}", getattr(new_model_patterns, attr))
if "pretrained_archive_map" in pattern:
old_model_line = old_model_line.replace(
"{pretrained_archive_map}", f"{old_model_patterns.model_upper_cased}_PRETRAINED_CONFIG_ARCHIVE_MAP"
)
new_model_line = new_model_line.replace(
"{pretrained_archive_map}", f"{new_model_patterns.model_upper_cased}_PRETRAINED_CONFIG_ARCHIVE_MAP"
)

new_model_line = new_model_line.replace(
old_model_patterns.model_camel_cased, new_model_patterns.model_camel_cased
)
Expand Down
50 changes: 0 additions & 50 deletions src/transformers/convert_pytorch_checkpoint_to_tf2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,6 @@
import os

from . import (
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
BART_PRETRAINED_MODEL_ARCHIVE_LIST,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
LAYOUTLM_PRETRAINED_MODEL_ARCHIVE_LIST,
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
WAV_2_VEC_2_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
AlbertConfig,
BartConfig,
BertConfig,
Expand Down Expand Up @@ -140,31 +118,26 @@
TFBartForConditionalGeneration,
TFBartForSequenceClassification,
BartForConditionalGeneration,
BART_PRETRAINED_MODEL_ARCHIVE_LIST,
),
"bert": (
BertConfig,
TFBertForPreTraining,
BertForPreTraining,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"google-bert/bert-large-uncased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
BertForQuestionAnswering,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"google-bert/bert-large-cased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
BertForQuestionAnswering,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"google-bert/bert-base-cased-finetuned-mrpc": (
BertConfig,
TFBertForSequenceClassification,
BertForSequenceClassification,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"dpr": (
DPRConfig,
Expand All @@ -174,130 +147,107 @@
DPRQuestionEncoder,
DPRContextEncoder,
DPRReader,
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST,
),
"openai-community/gpt2": (
GPT2Config,
TFGPT2LMHeadModel,
GPT2LMHeadModel,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlnet": (
XLNetConfig,
TFXLNetLMHeadModel,
XLNetLMHeadModel,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm": (
XLMConfig,
TFXLMWithLMHeadModel,
XLMWithLMHeadModel,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm-roberta": (
XLMRobertaConfig,
TFXLMRobertaForMaskedLM,
XLMRobertaForMaskedLM,
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"transfo-xl": (
TransfoXLConfig,
TFTransfoXLLMHeadModel,
TransfoXLLMHeadModel,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"openai-community/openai-gpt": (
OpenAIGPTConfig,
TFOpenAIGPTLMHeadModel,
OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"roberta": (
RobertaConfig,
TFRobertaForCausalLM,
TFRobertaForMaskedLM,
RobertaForMaskedLM,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"layoutlm": (
LayoutLMConfig,
TFLayoutLMForMaskedLM,
LayoutLMForMaskedLM,
LAYOUTLM_PRETRAINED_MODEL_ARCHIVE_LIST,
),
"FacebookAI/roberta-large-mnli": (
RobertaConfig,
TFRobertaForSequenceClassification,
RobertaForSequenceClassification,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"camembert": (
CamembertConfig,
TFCamembertForMaskedLM,
CamembertForMaskedLM,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"flaubert": (
FlaubertConfig,
TFFlaubertWithLMHeadModel,
FlaubertWithLMHeadModel,
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"distilbert": (
DistilBertConfig,
TFDistilBertForMaskedLM,
DistilBertForMaskedLM,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"distilbert-base-distilled-squad": (
DistilBertConfig,
TFDistilBertForQuestionAnswering,
DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"lxmert": (
LxmertConfig,
TFLxmertForPreTraining,
LxmertForPreTraining,
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"lxmert-visual-feature-encoder": (
LxmertConfig,
TFLxmertVisualFeatureEncoder,
LxmertVisualFeatureEncoder,
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"Salesforce/ctrl": (
CTRLConfig,
TFCTRLLMHeadModel,
CTRLLMHeadModel,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"albert": (
AlbertConfig,
TFAlbertForPreTraining,
AlbertForPreTraining,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"t5": (
T5Config,
TFT5ForConditionalGeneration,
T5ForConditionalGeneration,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"electra": (
ElectraConfig,
TFElectraForPreTraining,
ElectraForPreTraining,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"wav2vec2": (
Wav2Vec2Config,
TFWav2Vec2Model,
Wav2Vec2Model,
WAV_2_VEC_2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
}

Expand Down
13 changes: 1 addition & 12 deletions src/transformers/models/albert/configuration_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,7 @@

from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig


ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"albert/albert-base-v1": "https://huggingface.co/albert/albert-base-v1/resolve/main/config.json",
"albert/albert-large-v1": "https://huggingface.co/albert/albert-large-v1/resolve/main/config.json",
"albert/albert-xlarge-v1": "https://huggingface.co/albert/albert-xlarge-v1/resolve/main/config.json",
"albert/albert-xxlarge-v1": "https://huggingface.co/albert/albert-xxlarge-v1/resolve/main/config.json",
"albert/albert-base-v2": "https://huggingface.co/albert/albert-base-v2/resolve/main/config.json",
"albert/albert-large-v2": "https://huggingface.co/albert/albert-large-v2/resolve/main/config.json",
"albert/albert-xlarge-v2": "https://huggingface.co/albert/albert-xlarge-v2/resolve/main/config.json",
"albert/albert-xxlarge-v2": "https://huggingface.co/albert/albert-xxlarge-v2/resolve/main/config.json",
}
from ..deprecated._archive_maps import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP # noqa: F401, E402


class AlbertConfig(PretrainedConfig):
Expand Down
12 changes: 1 addition & 11 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,7 @@
_CONFIG_FOR_DOC = "AlbertConfig"


ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert/albert-base-v1",
"albert/albert-large-v1",
"albert/albert-xlarge-v1",
"albert/albert-xxlarge-v1",
"albert/albert-base-v2",
"albert/albert-large-v2",
"albert/albert-xlarge-v2",
"albert/albert-xxlarge-v2",
# See all ALBERT models at https://huggingface.co/models?filter=albert
]
from ..deprecated._archive_maps import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST # noqa: F401, E402


def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
Expand Down
13 changes: 2 additions & 11 deletions src/transformers/models/albert/modeling_tf_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,8 @@
_CHECKPOINT_FOR_DOC = "albert/albert-base-v2"
_CONFIG_FOR_DOC = "AlbertConfig"

TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert/albert-base-v1",
"albert/albert-large-v1",
"albert/albert-xlarge-v1",
"albert/albert-xxlarge-v1",
"albert/albert-base-v2",
"albert/albert-large-v2",
"albert/albert-xlarge-v2",
"albert/albert-xxlarge-v2",
# See all ALBERT models at https://huggingface.co/models?filter=albert
]

from ..deprecated._archive_maps import TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST # noqa: F401, E402


class TFAlbertPreTrainingLoss:
Expand Down
Loading
Loading