FlagOpen · yuzhou03 · Sep 26, 2023 · Sep 18, 2023 · Sep 22, 2023 · Sep 26, 2023
diff --git a/training/benchmarks/transformer_xl/README.md b/training/benchmarks/transformer_xl/README.md
@@ -0,0 +1,54 @@
+## 1. 模型信息
+
+- 模型介绍
+
+The Transformer-XL model was proposed in [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context](https://arxiv.org/abs/1901.02860) by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan Salakhutdinov. It’s a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax inputs and outputs (tied).
+
+- 论文
+
+[Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context](https://arxiv.org/abs/1901.02860)
+
+- 模型代码来源
+
+Pytorch case:
+ This repository includes software from https://github.com/huggingface/transformers/tree/v4.33.0
+ licensed under the Apache License 2.0.
+
+ Some of the files in this directory were modified by BAAI in 2023 to support FlagPerf.
+
+
+## 2. 数据集
+
+https://paperswithcode.com/dataset/wikitext-103
+
+The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike License.
+
+The dataset is available on huggingface: https://huggingface.co/datasets/wikitext. And tokenizer is available at https://huggingface.co/transfo-xl-wt103.
+
+The dataset should be organized as follow
+
+```
+data_dir/
+├── data
+│   ├── LICENSE
+│   ├── dataset_info.json
+│   ├── wikitext-test.arrow
+│   ├── wikitext-train.arrow
+│   └── wikitext-validation.arrow
+└── model
+    ├── config.json
+    ├── pytorch_model.bin
+    ├── vocab.bin
+    └── vocab.pkl
+```
+
+
+## 3. 框架与芯片支持情况说明
+
+- 目前FlagPerf提供 &lt;Framework&gt; 的实现.
+- 目前已适配本模型的芯片如下：
+
+|              | *Pytorch* | *Paddle* | *TensorFlow2* |
+| ------------ | --------- | -------- | ------------- |
+| *Nvidia GPU* |    *✅*   | *N/A*    | *N/A*         |
+| *Kunlunxin XPU* | *N/A*  | *N/A*    | *N/A*         |
diff --git a/training/benchmarks/transformer_xl/pytorch/config/__init__.py b/training/benchmarks/transformer_xl/pytorch/config/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2023 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ._base import *
+from .mutable_params import mutable_params
diff --git a/training/benchmarks/transformer_xl/pytorch/config/_base.py b/training/benchmarks/transformer_xl/pytorch/config/_base.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2023 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# DO NOT MODIFY THESE REQUIRED PARAMETERS
+
+# Required parameters
+vendor: str = None
+data_dir: str = None
+name: str = "transformer_xl"
+cudnn_benchmark: bool = False
+cudnn_deterministic: bool = True
+
+# Optional parameters
+
+# =========================================================
+# loss scale
+# =========================================================
+lr: float = 0.00025
+max_step: int = 200000
+eta_min: float = 0.0
+warmup_step: int = 0
+weight_decay = 0.0
+tgt_len: int = 150
+
+# =========================================================
+# train && evaluate
+# =========================================================
+train_batch_size: int = 60
+eval_batch_size: int = 60
+
+max_steps: int = None
+max_epoch: int = 10
+target_ppl: float = 55
+
+do_train = True
+distributed: bool = True
+
+
+# =========================================================
+# utils
+# =========================================================
+seed: int = 42
+num_epochs_to_generate_seeds_for: int = 2
+dist_backend: str = 'nccl'
+device: str = None
+
+# =========================================================
+# datasets
+# =========================================================
+dataloader_drop_last: bool = False
+dataloader_num_workers: int = 8
+dataset_config_name: str = "wikitext-103-raw-v1"
+
+# =========================================================
+# for driver
+# =========================================================
+local_rank: int = -1
+use_env: bool = True
+log_freq: int = 100
+print_freq: int = 100
+n_device: int = 1
+sync_bn: bool = False
+gradient_accumulation_steps: int = 1
diff --git a/training/benchmarks/transformer_xl/pytorch/config/mutable_params.py b/training/benchmarks/transformer_xl/pytorch/config/mutable_params.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2023 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+mutable_params = [
+     'vendor', 'data_dir', 'lr', 'weight_decay', 'train_batch_size',
+     'eval_batch_size', 'do_train', 'distributed', 'dist_backend', 'device',
+     'cudnn_benchmark', 'cudnn_deterministic'
+]
diff --git a/training/benchmarks/transformer_xl/pytorch/dataset/__init__.py b/training/benchmarks/transformer_xl/pytorch/dataset/__init__.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2023 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+from itertools import chain
+
+import numpy as np
+import torch
+
+from datasets import load_dataset
+from torch.utils.data import DataLoader
+from transformers import AutoTokenizer
+from transformers.data import default_data_collator
+
+
+class WorkerInitializer(object):
+    _instance = None
+
+    def __init__(self, seed):
+        self.seed = seed
+
+    def __call__(self, idx):
+        np.random.seed(seed=self.seed + idx)
+        random.seed(self.seed + idx)
+
+    @classmethod
+    def default(cls, seed=0):
+        if cls._instance is None:
+            cls._instance = cls(seed)
+        return cls._instance
+
+
+def create_sampler(dataset):
+    if torch.distributed.is_initialized():
+        world_size = torch.distributed.get_world_size()
+        rank = torch.distributed.get_rank()
+        return torch.utils.data.distributed.DistributedSampler(
+            dataset, num_replicas=world_size, rank=rank)
+    else:
+        return torch.utils.data.RandomSampler(dataset)
+
+
+def create_dataset(config, data_dir=None, config_name=None, model_dir=None):
+    data_dir = data_dir or config.data_dir + "/data"
+    config_name = config_name or config.dataset_config_name
+    model_dir = model_dir or config.data_dir + "/model"
+    raw_dataset = load_dataset(data_dir, config_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_dir)
+
+    def tokenize(examples):
+        return tokenizer(examples["text"])
+
+    tokenized_datasets = raw_dataset.map(
+        tokenize,
+        batched=True,
+        remove_columns=["text"],
+        load_from_cache_file=True,
+        desc="running tokenizer on dataset",
+    )
+
+    block_size = config.tgt_len
+
+    def group_texts(examples):
+        concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
+        total_length = len(concatenated_examples[list(examples.keys())[0]])
+        total_length = (total_length // block_size) * block_size
+        result = {
+            k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+            for k, t in concatenated_examples.items()
+        }
+        result["labels"] = result["input_ids"].copy()
+        return result
+
+    lm_datasets = tokenized_datasets.map(
+        group_texts,
+        batched=True,
+    )
+
+    train_dataset = DataLoader(
+        lm_datasets["train"],
+        sampler=create_sampler(lm_datasets["train"]),
+        batch_size=config.train_batch_size,
+        collate_fn=default_data_collator,
+    )
+    test_dataset = DataLoader(
+        lm_datasets["test"],
+        sampler=create_sampler(lm_datasets["test"]),
+        batch_size=config.train_batch_size,
+        collate_fn=default_data_collator,
+    )
+    return train_dataset, test_dataset
diff --git a/training/benchmarks/transformer_xl/pytorch/model/__init__.py b/training/benchmarks/transformer_xl/pytorch/model/__init__.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2023 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed
+from torch.nn.parallel import DistributedDataParallel
+
+from transformers import TransfoXLConfig, TransfoXLModel
+from transformers import TransfoXLTokenizer, TransfoXLForSequenceClassification
+from transformers import TransfoXLLMHeadModel
+
+from transformers import T5Model
+
+
+def _model_to_ddp(model, config):
+    if torch.distributed.is_initialized():
+        return DistributedDataParallel(model,
+                                       find_unused_parameters=True,
+                                       device_ids=[config.local_rank])
+    return model
+
+
+def create_model(config, device):
+    hfconfig = TransfoXLConfig(
+        n_layer=16,
+        d_model=410,
+        d_embed=410,
+        n_head=10,
+        d_head=41,
+        d_inner=2100,
+        dropout=0.1,
+        dropatt=0.0,
+        mem_len=150,
+    )
+    model = TransfoXLLMHeadModel(hfconfig).to(device)
+    model.post_init()
+    # model = TransfoXLLMHeadModel.from_pretrained(config.data_dir+"/model").to(device)
+    model = _model_to_ddp(model, config)
+    tokenizer = TransfoXLTokenizer()
+    return model, hfconfig, tokenizer
diff --git a/training/benchmarks/transformer_xl/pytorch/optimizer/__init__.py b/training/benchmarks/transformer_xl/pytorch/optimizer/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2023 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from torch.optim import Adam
+
+def create_optimizer(model, config):
+    params = [p for p in model.parameters() if p.requires_grad]
+    optimizer = Adam(params, lr=config.lr)
+    return optimizer