diff --git a/LICENSE b/LICENSE
index 7a4a3ea2..0f76b3f8 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,4 @@
+Copyright © 2022 BAAI. All rights reserved.
Apache License
Version 2.0, January 2004
@@ -175,28 +176,3 @@
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
index c3172981..8c549251 100644
--- a/README.md
+++ b/README.md
@@ -152,20 +152,13 @@ Thanks for your interest in contributing! There are many ways to get involved;
start with our [contributor guidelines](CONTRIBUTING.md) and then
check these [open issues](https://github.com/BAAI-WuDao/Sailing/issues) for specific tasks.
+## Contact us
+Scan wechat QR code
-## [License](/LICENSE)
-```
-Copyright [2022] [BAAI]
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+
-http://www.apache.org/licenses/LICENSE-2.0
+## [License](/LICENSE)
+The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), however portions of the project are available under separate license terms:
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-```
+* Megatron-LM is licensed under the [Megatron-LM license](https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE)
+* GLM is licensed under the [MIT license](https://github.com/THUDM/GLM/blob/main/LICENSE)
\ No newline at end of file
diff --git a/README_zh.md b/README_zh.md
index af4f1fdd..3e01ee80 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -213,20 +213,14 @@ for text_pair in test_data:
# 贡献代码
感谢您对贡献的兴趣! 参与的方式有很多; 从我们的[贡献者指南](CONTRIBUTING.md) 开始,然后检查这些[未解决的问题](https://github.com/BAAI-WuDao/Sailing/issues)以执行特定任务。
+# 联系我们
+欢迎扫码加入FlagAI用户群
-# [许可证](/LICENSE)
-```
-Copyright [2022] [BAAI]
+
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
+# [许可证](/LICENSE)
+大部分的FlagAI项目是基于[Apache 2.0 license](LICENSE), 但是部分的代码是基于其他的协议:
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-```
+* Megatron-LM 是基于协议[Megatron-LM license](https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE)
+* GLM 是基于协议[MIT license](https://github.com/THUDM/GLM/blob/main/LICENSE)
diff --git a/examples/bert_title_generation_english/generate.py b/examples/bert_title_generation_english/generate.py
index 9210c5fc..007a8431 100755
--- a/examples/bert_title_generation_english/generate.py
+++ b/examples/bert_title_generation_english/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/bert_title_generation_english/train.py b/examples/bert_title_generation_english/train.py
index 555adb87..8bd40899 100644
--- a/examples/bert_title_generation_english/train.py
+++ b/examples/bert_title_generation_english/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from torch.utils.data import Dataset
diff --git a/examples/glm_blank_filling/glm_generate_samples.py b/examples/glm_blank_filling/glm_generate_samples.py
index 043928b6..3c0e17ef 100755
--- a/examples/glm_blank_filling/glm_generate_samples.py
+++ b/examples/glm_blank_filling/glm_generate_samples.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/examples/glm_poetry_generation/generate.py b/examples/glm_poetry_generation/generate.py
index 1c1e86b3..6bb6711d 100755
--- a/examples/glm_poetry_generation/generate.py
+++ b/examples/glm_poetry_generation/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/glm_poetry_generation/train.py b/examples/glm_poetry_generation/train.py
index 101c387e..b6ca393b 100644
--- a/examples/glm_poetry_generation/train.py
+++ b/examples/glm_poetry_generation/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from torch.utils.data import Dataset
diff --git a/examples/glm_pretrain/train.py b/examples/glm_pretrain/train.py
index 4a92d099..541e6fb4 100644
--- a/examples/glm_pretrain/train.py
+++ b/examples/glm_pretrain/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.data.tokenizer import GLMLargeChTokenizer
from flagai.model.glm_model import GLMForSeq2Seq
from flagai.trainer import Trainer
diff --git a/examples/glm_seq2seq/train.py b/examples/glm_seq2seq/train.py
index dcd8f06e..8cc14c3a 100644
--- a/examples/glm_seq2seq/train.py
+++ b/examples/glm_seq2seq/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMForSeq2Seq
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer
diff --git a/examples/glm_superglue/train.py b/examples/glm_superglue/train.py
index d337d432..e4fc9bd1 100644
--- a/examples/glm_superglue/train.py
+++ b/examples/glm_superglue/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMForSingleTokenCloze, GLMForMultiTokenCloze, GLMForMultiTokenClozeFast
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer
diff --git a/examples/glm_superglue/train_10b_clue.py b/examples/glm_superglue/train_10b_clue.py
index 0baa19b4..70858740 100644
--- a/examples/glm_superglue/train_10b_clue.py
+++ b/examples/glm_superglue/train_10b_clue.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMForSingleTokenCloze
from flagai.data.tokenizer import GLMLargeChTokenizer
diff --git a/examples/glm_superglue/train_10b_superglue.py b/examples/glm_superglue/train_10b_superglue.py
index 56efdbc1..b98847ef 100644
--- a/examples/glm_superglue/train_10b_superglue.py
+++ b/examples/glm_superglue/train_10b_superglue.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMForSingleTokenCloze
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer
diff --git a/examples/glm_superglue/train_prefix.py b/examples/glm_superglue/train_prefix.py
index dd374a89..9358c28a 100644
--- a/examples/glm_superglue/train_prefix.py
+++ b/examples/glm_superglue/train_prefix.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMForSingleTokenCloze, GLMForMultiTokenCloze, GLMForMultiTokenClozeFast, GLMForSequenceClassification
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer
diff --git a/examples/glm_title_generation/generate.py b/examples/glm_title_generation/generate.py
index a16044cc..a143c8b6 100644
--- a/examples/glm_title_generation/generate.py
+++ b/examples/glm_title_generation/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/glm_title_generation/train.py b/examples/glm_title_generation/train.py
index 35823d2c..cbd47b60 100644
--- a/examples/glm_title_generation/train.py
+++ b/examples/glm_title_generation/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import numpy as np
import torch
diff --git a/examples/gpt2_text_writting/generate.py b/examples/gpt2_text_writting/generate.py
index 544d43fd..cec5bc86 100644
--- a/examples/gpt2_text_writting/generate.py
+++ b/examples/gpt2_text_writting/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/gpt2_title_generation/generate.py b/examples/gpt2_title_generation/generate.py
index 1dc8eb2b..488009f7 100755
--- a/examples/gpt2_title_generation/generate.py
+++ b/examples/gpt2_title_generation/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/gpt2_title_generation/train.py b/examples/gpt2_title_generation/train.py
index 8e36fc7c..31d0efc8 100644
--- a/examples/gpt2_title_generation/train.py
+++ b/examples/gpt2_title_generation/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import sys
sys.path.append('/data/liuguang/Sailing')
diff --git a/examples/gpt2_title_generation/train_multi_gpu.py b/examples/gpt2_title_generation/train_multi_gpu.py
index fcffdea1..6f938171 100644
--- a/examples/gpt2_title_generation/train_multi_gpu.py
+++ b/examples/gpt2_title_generation/train_multi_gpu.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import sys
sys.path.append("/data/liuguang/Sailing")
diff --git a/examples/roberta_faq/1_construct_data.py b/examples/roberta_faq/1_construct_data.py
index acebc2b2..23e288e0 100644
--- a/examples/roberta_faq/1_construct_data.py
+++ b/examples/roberta_faq/1_construct_data.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# construct data
# data from https://github.com/murufeng/ChineseNlpCorpus
import torch
diff --git a/examples/roberta_faq/2_test_bert_faq.py b/examples/roberta_faq/2_test_bert_faq.py
index d5de394e..2fca801f 100644
--- a/examples/roberta_faq/2_test_bert_faq.py
+++ b/examples/roberta_faq/2_test_bert_faq.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import faiss
import torch
from flagai.auto_model.auto_loader import AutoLoader
diff --git a/examples/roberta_ner/generate.py b/examples/roberta_ner/generate.py
index 4e436279..d94ace1d 100755
--- a/examples/roberta_ner/generate.py
+++ b/examples/roberta_ner/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/roberta_ner/generate_crf.py b/examples/roberta_ner/generate_crf.py
index e65af51b..ec690263 100755
--- a/examples/roberta_ner/generate_crf.py
+++ b/examples/roberta_ner/generate_crf.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/roberta_ner/generate_global_pointer.py b/examples/roberta_ner/generate_global_pointer.py
index 54c57530..84df2ba9 100755
--- a/examples/roberta_ner/generate_global_pointer.py
+++ b/examples/roberta_ner/generate_global_pointer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/roberta_ner/train.py b/examples/roberta_ner/train.py
index bafec717..25579629 100644
--- a/examples/roberta_ner/train.py
+++ b/examples/roberta_ner/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
from tqdm import tqdm
diff --git a/examples/roberta_ner/train_crf.py b/examples/roberta_ner/train_crf.py
index ff9c9343..64ccffc5 100644
--- a/examples/roberta_ner/train_crf.py
+++ b/examples/roberta_ner/train_crf.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from tqdm import tqdm
import torch
from torch.utils.data import Dataset
diff --git a/examples/roberta_ner/train_global_pointer.py b/examples/roberta_ner/train_global_pointer.py
index d164726f..ffba8d8f 100644
--- a/examples/roberta_ner/train_global_pointer.py
+++ b/examples/roberta_ner/train_global_pointer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from tqdm import tqdm
import torch
from torch.utils.data import Dataset
diff --git a/examples/roberta_semantic_matching/generate.py b/examples/roberta_semantic_matching/generate.py
index 6548d55e..70cb8375 100755
--- a/examples/roberta_semantic_matching/generate.py
+++ b/examples/roberta_semantic_matching/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/roberta_semantic_matching/train.py b/examples/roberta_semantic_matching/train.py
index 40d896bb..6636169a 100644
--- a/examples/roberta_semantic_matching/train.py
+++ b/examples/roberta_semantic_matching/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from torch.utils.data import Dataset
diff --git a/examples/roberta_title_generation/generate.py b/examples/roberta_title_generation/generate.py
index 0564a515..51dd9123 100755
--- a/examples/roberta_title_generation/generate.py
+++ b/examples/roberta_title_generation/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/roberta_title_generation/train.py b/examples/roberta_title_generation/train.py
index 65dcdeb6..73505de1 100644
--- a/examples/roberta_title_generation/train.py
+++ b/examples/roberta_title_generation/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from torch.utils.data import Dataset
diff --git a/examples/roberta_title_generation/train_multi_gpu.py b/examples/roberta_title_generation/train_multi_gpu.py
index 5923e9c2..c344b8e6 100644
--- a/examples/roberta_title_generation/train_multi_gpu.py
+++ b/examples/roberta_title_generation/train_multi_gpu.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import sys
sys.path.append("/data/liuguang/Sailing")
diff --git a/examples/t5_flagai_11b/deepspeed.json b/examples/t5_flagai_11b/deepspeed.json
new file mode 100644
index 00000000..a0dfee13
--- /dev/null
+++ b/examples/t5_flagai_11b/deepspeed.json
@@ -0,0 +1,40 @@
+{
+ "train_micro_batch_size_per_gpu": 2,
+ "gradient_accumulation_steps": 1,
+ "steps_per_print": 100,
+ "gradient_clipping": 1.0,
+ "zero_optimization": {
+ "stage": 3,
+ "contiguous_gradients": false,
+ "overlap_comm": true,
+ "reduce_scatter": true,
+ "reduce_bucket_size": 5e7,
+ "allgather_bucket_size": 5e7,
+ "cpu_offload": true
+ },
+ "zero_allow_untested_optimizer": true,
+ "fp16": {
+ "enabled": true,
+ "loss_scale": 0,
+ "loss_scale_window": 1000,
+ "hysteresis": 2,
+ "min_loss_scale": 1
+ },
+ "optimizer": {
+ "type": "Adam",
+ "params": {
+ "lr": 0.0004,
+ "weight_decay": 0.01,
+ "betas": [
+ 0.9,
+ 0.98
+ ],
+ "eps": 1e-6
+ }
+ },
+ "activation_checkpointing": {
+ "partition_activations": false,
+ "contiguous_memory_optimization": false
+ },
+ "wall_clock_breakdown": false
+ }
\ No newline at end of file
diff --git a/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py b/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py
index 197a3149..7bfc5cb7 100644
--- a/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py
+++ b/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import sys
sys.path.append('/mnt/liuguang/FlagAI')
@@ -74,7 +77,7 @@ def read_file():
model = T5ForConditionalGeneration.from_pretrain(download_path='/mnt',
model_name='t5-11b')
-
+model.gradient_checkpointing = True
print("loading model & tokenizer is done!")
maxlen = 1024
diff --git a/examples/t5_huggingface/train_t5_11b.py b/examples/t5_huggingface/train_t5_11b.py
index 59e0479a..090d0397 100644
--- a/examples/t5_huggingface/train_t5_11b.py
+++ b/examples/t5_huggingface/train_t5_11b.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from transformers import T5ForConditionalGeneration, T5Tokenizer
from torch.utils.data import Dataset
diff --git a/examples/t5_title_generation/generate.py b/examples/t5_title_generation/generate.py
index 35790b8e..ece29a16 100644
--- a/examples/t5_title_generation/generate.py
+++ b/examples/t5_title_generation/generate.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/examples/t5_title_generation/train.py b/examples/t5_title_generation/train.py
index b4c5e628..bdb9509a 100644
--- a/examples/t5_title_generation/train.py
+++ b/examples/t5_title_generation/train.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import torch
from torch.utils.data import Dataset
diff --git a/flagai/auto_model/auto_loader.py b/flagai/auto_model/auto_loader.py
index 089c1199..3d90b61e 100644
--- a/flagai/auto_model/auto_loader.py
+++ b/flagai/auto_model/auto_loader.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import importlib
import os
from flagai.model.file_utils import _get_model_id, _get_vocab_path
diff --git a/flagai/data/collate_utils.py b/flagai/data/collate_utils.py
index 2cad78c4..4530d04b 100644
--- a/flagai/data/collate_utils.py
+++ b/flagai/data/collate_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import numpy as np
import torch
diff --git a/flagai/data/dataset/block/blocklm_utils.py b/flagai/data/dataset/block/blocklm_utils.py
index 7c806779..0547a7a8 100644
--- a/flagai/data/dataset/block/blocklm_utils.py
+++ b/flagai/data/dataset/block/blocklm_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import torch.utils.data
# import mpu
diff --git a/flagai/data/dataset/block/corpora.py b/flagai/data/dataset/block/corpora.py
index c656b049..117b33a2 100644
--- a/flagai/data/dataset/block/corpora.py
+++ b/flagai/data/dataset/block/corpora.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import json
import tqdm
import os
diff --git a/flagai/data/dataset/block/data_utils.py b/flagai/data/dataset/block/data_utils.py
index ea686c8c..6f98cbb6 100644
--- a/flagai/data/dataset/block/data_utils.py
+++ b/flagai/data/dataset/block/data_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/dataset/block/dataset.py b/flagai/data/dataset/block/dataset.py
index a1641215..47459ae7 100644
--- a/flagai/data/dataset/block/dataset.py
+++ b/flagai/data/dataset/block/dataset.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from pkgutil import get_loader
from torch.utils.data import Dataset
from itertools import accumulate
diff --git a/flagai/data/dataset/block/lazy_loader.py b/flagai/data/dataset/block/lazy_loader.py
index 861e7247..521bac7d 100644
--- a/flagai/data/dataset/block/lazy_loader.py
+++ b/flagai/data/dataset/block/lazy_loader.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/dataset/data_collator/collate_fn.py b/flagai/data/dataset/data_collator/collate_fn.py
index db90b31e..525fa6de 100644
--- a/flagai/data/dataset/data_collator/collate_fn.py
+++ b/flagai/data/dataset/data_collator/collate_fn.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import torch.utils.data
import random
diff --git a/flagai/data/dataset/data_utils.py b/flagai/data/dataset/data_utils.py
index d165dfc6..5b56656a 100644
--- a/flagai/data/dataset/data_utils.py
+++ b/flagai/data/dataset/data_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/dataset/language_model/dataset.py b/flagai/data/dataset/language_model/dataset.py
index 28e1a7fa..2f0e36d2 100644
--- a/flagai/data/dataset/language_model/dataset.py
+++ b/flagai/data/dataset/language_model/dataset.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import json
import math
diff --git a/flagai/data/dataset/seq2seq/dataset.py b/flagai/data/dataset/seq2seq/dataset.py
index 66eb3370..88634b79 100644
--- a/flagai/data/dataset/seq2seq/dataset.py
+++ b/flagai/data/dataset/seq2seq/dataset.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import os
import json
import random
diff --git a/flagai/data/dataset/superglue/control.py b/flagai/data/dataset/superglue/control.py
index 6cddeecd..3cc1fbb0 100644
--- a/flagai/data/dataset/superglue/control.py
+++ b/flagai/data/dataset/superglue/control.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.data.dataset.superglue.processor import *
from flagai.data.dataset.superglue.pvp import *
diff --git a/flagai/data/dataset/superglue/dataset.py b/flagai/data/dataset/superglue/dataset.py
index 18074883..92485775 100644
--- a/flagai/data/dataset/superglue/dataset.py
+++ b/flagai/data/dataset/superglue/dataset.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from torch.utils.data import Dataset
from .control import SuperGlueProcessor
from collections import Counter
diff --git a/flagai/data/dataset/superglue/processor.py b/flagai/data/dataset/superglue/processor.py
index 13b6cc28..d4b6fcfc 100644
--- a/flagai/data/dataset/superglue/processor.py
+++ b/flagai/data/dataset/superglue/processor.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import zipfile
from abc import ABC, abstractmethod
import os
diff --git a/flagai/data/dataset/superglue/properties.py b/flagai/data/dataset/superglue/properties.py
index b574d7bb..d9d610a1 100644
--- a/flagai/data/dataset/superglue/properties.py
+++ b/flagai/data/dataset/superglue/properties.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
urls = {
'axb':
'https://dl.fbaipublicfiles.com/glue/superglue/data/v2/AX-b.zip',
diff --git a/flagai/data/dataset/superglue/pvp.py b/flagai/data/dataset/superglue/pvp.py
index a5b0a04c..9cd21714 100644
--- a/flagai/data/dataset/superglue/pvp.py
+++ b/flagai/data/dataset/superglue/pvp.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
diff --git a/flagai/data/file_utils.py b/flagai/data/file_utils.py
index d0d9b317..4afe05e4 100644
--- a/flagai/data/file_utils.py
+++ b/flagai/data/file_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# This file is provided as is from:
# https://github.com/huggingface/pytorch-pretrained-BERT
# Please refer to their repository for copyright.
diff --git a/flagai/data/tokenizer/bert/bert_tokenizer.py b/flagai/data/tokenizer/bert/bert_tokenizer.py
index f9f795f3..5c0578b3 100644
--- a/flagai/data/tokenizer/bert/bert_tokenizer.py
+++ b/flagai/data/tokenizer/bert/bert_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/tokenizer/bert/wordpiece.py b/flagai/data/tokenizer/bert/wordpiece.py
index 7f95cad5..cdd9f8f5 100644
--- a/flagai/data/tokenizer/bert/wordpiece.py
+++ b/flagai/data/tokenizer/bert/wordpiece.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from __future__ import absolute_import, division, print_function, unicode_literals
from typing import List, Union, Optional
import collections
diff --git a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py
index b6ad2190..8bf16ef4 100644
--- a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py
+++ b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py
index 1e354924..b039b2ac 100644
--- a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py
+++ b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team.
#
diff --git a/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py b/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py
index 7853c688..23c69e81 100644
--- a/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py
+++ b/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
"""
from https://github.com/openai/gpt-2/, changed for chinese
"""
diff --git a/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py b/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py
index 998fd20d..23e27cb9 100644
--- a/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py
+++ b/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py b/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py
index d1dd28df..dbc295fa 100644
--- a/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py
+++ b/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/tokenizer/glm_large_en/wordpiece.py b/flagai/data/tokenizer/glm_large_en/wordpiece.py
index 49a021d2..2e54be28 100644
--- a/flagai/data/tokenizer/glm_large_en/wordpiece.py
+++ b/flagai/data/tokenizer/glm_large_en/wordpiece.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
#
diff --git a/flagai/data/tokenizer/roberta/roberta_tokenizer.py b/flagai/data/tokenizer/roberta/roberta_tokenizer.py
index ad118c6f..155edf7e 100644
--- a/flagai/data/tokenizer/roberta/roberta_tokenizer.py
+++ b/flagai/data/tokenizer/roberta/roberta_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py b/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py
index 87cc7020..9692743b 100644
--- a/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py
+++ b/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.data.tokenizer.bert.wordpiece import BertTokenizer as Tokenizer
from transformers import BertTokenizer
import jieba
diff --git a/flagai/data/tokenizer/t5/t5_tokenizer.py b/flagai/data/tokenizer/t5/t5_tokenizer.py
index 97ac7d4c..f8335364 100644
--- a/flagai/data/tokenizer/t5/t5_tokenizer.py
+++ b/flagai/data/tokenizer/t5/t5_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/data/tokenizer/tokenizer.py b/flagai/data/tokenizer/tokenizer.py
index 8bec4e8c..557f37b4 100644
--- a/flagai/data/tokenizer/tokenizer.py
+++ b/flagai/data/tokenizer/tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/fp16/fp16.py b/flagai/fp16/fp16.py
index 435e8012..87e262c2 100644
--- a/flagai/fp16/fp16.py
+++ b/flagai/fp16/fp16.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/fp16/fp16util.py b/flagai/fp16/fp16util.py
index 4a4b1837..bb02f5c4 100644
--- a/flagai/fp16/fp16util.py
+++ b/flagai/fp16/fp16util.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/fp16/loss_scaler.py b/flagai/fp16/loss_scaler.py
index 1c310dcb..93f7cd85 100644
--- a/flagai/fp16/loss_scaler.py
+++ b/flagai/fp16/loss_scaler.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/launch.py b/flagai/launch.py
index 4212b1b7..3fe1d989 100644
--- a/flagai/launch.py
+++ b/flagai/launch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright 2020 The Microsoft DeepSpeed Team
"""
sailing runner is the main front-end to launching multi-worker
diff --git a/flagai/logger.py b/flagai/logger.py
index 61ea1045..42558f9d 100644
--- a/flagai/logger.py
+++ b/flagai/logger.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import logging
import sys
import os
diff --git a/flagai/metrics.py b/flagai/metrics.py
index 2e51d334..d5a27f3c 100644
--- a/flagai/metrics.py
+++ b/flagai/metrics.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import re
from sklearn.metrics import f1_score
diff --git a/flagai/model/base_model.py b/flagai/model/base_model.py
index 56fe3ea7..345e3cd3 100644
--- a/flagai/model/base_model.py
+++ b/flagai/model/base_model.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from sklearn.linear_model import HuberRegressor
from torch.nn import Module
import torch
diff --git a/flagai/model/bert_model.py b/flagai/model/bert_model.py
index 3ab2974c..075f6004 100644
--- a/flagai/model/bert_model.py
+++ b/flagai/model/bert_model.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/model/blocks/bert_block.py b/flagai/model/blocks/bert_block.py
index 6ce58c9e..0fc0721d 100644
--- a/flagai/model/blocks/bert_block.py
+++ b/flagai/model/blocks/bert_block.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/model/blocks/glm_block.py b/flagai/model/blocks/glm_block.py
index b66f2adc..c2458606 100644
--- a/flagai/model/blocks/glm_block.py
+++ b/flagai/model/blocks/glm_block.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/model/blocks/gpt2_block.py b/flagai/model/blocks/gpt2_block.py
index ca8c4041..62925a08 100644
--- a/flagai/model/blocks/gpt2_block.py
+++ b/flagai/model/blocks/gpt2_block.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.model.layers.attentions import GPT2Attention
from flagai.model.layers.feedforward import GPT2MLP
from torch import nn
diff --git a/flagai/model/blocks/t5_block.py b/flagai/model/blocks/t5_block.py
index 8569d061..a9d89458 100644
--- a/flagai/model/blocks/t5_block.py
+++ b/flagai/model/blocks/t5_block.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from torch import nn
diff --git a/flagai/model/config.py b/flagai/model/config.py
index 3176a243..e61f7f0a 100644
--- a/flagai/model/config.py
+++ b/flagai/model/config.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
""" enc_dec model configuration """
import json
diff --git a/flagai/model/file_utils.py b/flagai/model/file_utils.py
index 4b4bff0d..df31b66a 100644
--- a/flagai/model/file_utils.py
+++ b/flagai/model/file_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from time import sleep
import requests
diff --git a/flagai/model/glm_model.py b/flagai/model/glm_model.py
index 4e2749e2..f66d87fe 100644
--- a/flagai/model/glm_model.py
+++ b/flagai/model/glm_model.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/model/gpt2_model.py b/flagai/model/gpt2_model.py
index 3bb3bd83..ce66b2a5 100644
--- a/flagai/model/gpt2_model.py
+++ b/flagai/model/gpt2_model.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import torch.nn as nn
import os
diff --git a/flagai/model/layers/activations.py b/flagai/model/layers/activations.py
index 7bc0fd08..308ede0a 100644
--- a/flagai/model/layers/activations.py
+++ b/flagai/model/layers/activations.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/flagai/model/layers/attentions.py b/flagai/model/layers/attentions.py
index ba0ea828..2b24af30 100644
--- a/flagai/model/layers/attentions.py
+++ b/flagai/model/layers/attentions.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/model/layers/crf.py b/flagai/model/layers/crf.py
index 412c43a9..9f337f51 100644
--- a/flagai/model/layers/crf.py
+++ b/flagai/model/layers/crf.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import torch.nn as nn
import torch.nn.functional as F
diff --git a/flagai/model/layers/embeddings.py b/flagai/model/layers/embeddings.py
index 372ad88b..4d97b326 100644
--- a/flagai/model/layers/embeddings.py
+++ b/flagai/model/layers/embeddings.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/model/layers/feedforward.py b/flagai/model/layers/feedforward.py
index 1bae1f50..eaac43a5 100644
--- a/flagai/model/layers/feedforward.py
+++ b/flagai/model/layers/feedforward.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# feedforward
import os
import torch
diff --git a/flagai/model/layers/global_pointer.py b/flagai/model/layers/global_pointer.py
index a42d0ed3..5433a271 100644
--- a/flagai/model/layers/global_pointer.py
+++ b/flagai/model/layers/global_pointer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
import torch.nn as nn
diff --git a/flagai/model/layers/layer_norm.py b/flagai/model/layers/layer_norm.py
index 413f6550..1a855f3b 100644
--- a/flagai/model/layers/layer_norm.py
+++ b/flagai/model/layers/layer_norm.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# layer norm
import torch
diff --git a/flagai/model/predictor/predictor.py b/flagai/model/predictor/predictor.py
index 8f4d2151..487665b7 100644
--- a/flagai/model/predictor/predictor.py
+++ b/flagai/model/predictor/predictor.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import numpy as np
import torch
import torch.nn.functional as F
diff --git a/flagai/model/predictor/utils.py b/flagai/model/predictor/utils.py
index 6f9cddae..ab62def4 100644
--- a/flagai/model/predictor/utils.py
+++ b/flagai/model/predictor/utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import json
import os
from typing import List
diff --git a/flagai/model/prompt.py b/flagai/model/prompt.py
index 893432f3..5044a2aa 100644
--- a/flagai/model/prompt.py
+++ b/flagai/model/prompt.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import random
import torch
diff --git a/flagai/model/t5_model.py b/flagai/model/t5_model.py
index f6f9e237..dac5f934 100644
--- a/flagai/model/t5_model.py
+++ b/flagai/model/t5_model.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright 2018 Mesh TensorFlow authors, T5 Authors and HuggingFace Inc. team.
#
diff --git a/flagai/model/utils.py b/flagai/model/utils.py
index 3b9c2e0d..b37103c0 100644
--- a/flagai/model/utils.py
+++ b/flagai/model/utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import math
import torch
diff --git a/flagai/mp_tools.py b/flagai/mp_tools.py
index 742f4af9..624da69f 100644
--- a/flagai/mp_tools.py
+++ b/flagai/mp_tools.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import sys
import os
import torch
diff --git a/flagai/mp_utils.py b/flagai/mp_utils.py
index c9a0e0fc..ba78ef8b 100644
--- a/flagai/mp_utils.py
+++ b/flagai/mp_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import sys
import os
import torch
diff --git a/flagai/mpu/cross_entropy.py b/flagai/mpu/cross_entropy.py
index 567d82e9..2b8ba008 100644
--- a/flagai/mpu/cross_entropy.py
+++ b/flagai/mpu/cross_entropy.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/mpu/data.py b/flagai/mpu/data.py
index f8d510e0..58a802e3 100644
--- a/flagai/mpu/data.py
+++ b/flagai/mpu/data.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/mpu/func_utils.py b/flagai/mpu/func_utils.py
index f9eba243..18cfbd65 100644
--- a/flagai/mpu/func_utils.py
+++ b/flagai/mpu/func_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/mpu/grads.py b/flagai/mpu/grads.py
index 63b9620e..16cec15a 100644
--- a/flagai/mpu/grads.py
+++ b/flagai/mpu/grads.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/mpu/initialize.py b/flagai/mpu/initialize.py
index ea36988c..1b9afa61 100644
--- a/flagai/mpu/initialize.py
+++ b/flagai/mpu/initialize.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/mpu/local_attention_function.py b/flagai/mpu/local_attention_function.py
index 50f9d6e7..5d8f22ee 100644
--- a/flagai/mpu/local_attention_function.py
+++ b/flagai/mpu/local_attention_function.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from torch import nn
import torch.nn.functional as F
diff --git a/flagai/mpu/mappings.py b/flagai/mpu/mappings.py
index 1f5fa9d9..5c76be09 100644
--- a/flagai/mpu/mappings.py
+++ b/flagai/mpu/mappings.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/mpu/random.py b/flagai/mpu/random.py
index 82aed5b0..653e6b61 100644
--- a/flagai/mpu/random.py
+++ b/flagai/mpu/random.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
#Modified by Samyam Rajbhandari
#Used to partition the activations stored for backward propagation
diff --git a/flagai/mpu/utils.py b/flagai/mpu/utils.py
index 88e9ae41..9aec39f5 100644
--- a/flagai/mpu/utils.py
+++ b/flagai/mpu/utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/optimizers.py b/flagai/optimizers.py
index 4b33427b..166e1f65 100644
--- a/flagai/optimizers.py
+++ b/flagai/optimizers.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
try:
from apex.optimizers import FusedAdam as Adam
diff --git a/flagai/schedulers.py b/flagai/schedulers.py
index 5c717c19..0267bc8a 100644
--- a/flagai/schedulers.py
+++ b/flagai/schedulers.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai/test_utils.py b/flagai/test_utils.py
index f77343f7..397dd9f3 100644
--- a/flagai/test_utils.py
+++ b/flagai/test_utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
def build_input_from_ids(text_a_ids=None,
text_b_ids=None,
answer_ids=None,
diff --git a/flagai/trainer.py b/flagai/trainer.py
index 3305661a..c32d8e06 100644
--- a/flagai/trainer.py
+++ b/flagai/trainer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# Arguments for training
try:
import deepspeed.utils
diff --git a/flagai/utils.py b/flagai/utils.py
index 2af09f3f..be2c39c5 100644
--- a/flagai/utils.py
+++ b/flagai/utils.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
diff --git a/flagai_wechat.png b/flagai_wechat.png
new file mode 100644
index 00000000..387bded2
Binary files /dev/null and b/flagai_wechat.png differ
diff --git a/logo.png b/logo.png
index 0136a04e..501e744d 100644
Binary files a/logo.png and b/logo.png differ
diff --git a/quickstart/glm_blank_filling_QA_ch.py b/quickstart/glm_blank_filling_QA_ch.py
index d3fe4f83..13f183ab 100644
--- a/quickstart/glm_blank_filling_QA_ch.py
+++ b/quickstart/glm_blank_filling_QA_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
# coding=utf-8
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
@@ -29,11 +32,12 @@
# Random seeds for reproducability.
# Model,
model = GLMModel.from_pretrain(model_name='GLM-large-ch', download_path="./state_dict/")
- tokenizer = GLMLargeChTokenizer(vocab_path='./state_dict/GLM-large-ch/cog-pretrain.model',
+ #tokenizer = GLMLargeChTokenizer(vocab_path='./state_dict/GLM-large-ch/cog-pretrain.model',
+ tokenizer = GLMLargeChTokenizer(
add_block_symbols=True,
add_task_mask=True,
add_decoder_mask=False,
- fix_command_token=False)
+ fix_command_token=True)
model.cuda(torch.cuda.current_device())
diff --git a/quickstart/glm_title_ch.py b/quickstart/glm_title_ch.py
index 7c260ee0..882d68bc 100644
--- a/quickstart/glm_title_ch.py
+++ b/quickstart/glm_title_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/quickstart/ner_ch.py b/quickstart/ner_ch.py
index 0d5923fa..04ede832 100644
--- a/quickstart/ner_ch.py
+++ b/quickstart/ner_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/quickstart/title_ch.py b/quickstart/title_ch.py
index f0c41f40..4a9531be 100644
--- a/quickstart/title_ch.py
+++ b/quickstart/title_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/quickstart/title_en.py b/quickstart/title_en.py
index 6f0cbf9b..4b59b1a8 100644
--- a/quickstart/title_en.py
+++ b/quickstart/title_en.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import torch
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/quickstart/writing_ch.py b/quickstart/writing_ch.py
index ab8331a0..77f1a0be 100644
--- a/quickstart/writing_ch.py
+++ b/quickstart/writing_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
diff --git a/setup.py b/setup.py
index 0317767e..32ad719f 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from setuptools import find_packages, setup
with open("requirements.txt") as f:
@@ -5,11 +8,11 @@
setup(
name="flagai",
- version="v1.0.0-beta6",
+ version="v1.0.0-beta7",
description="FlagAI aims to help researchers and developers to freely train and test large-scale models for NLP tasks.",
long_description=open("README.md", encoding="utf-8").read(),
long_description_content_type="text/markdown",
- author="BAAI Open",
+ author="BAAI-Open",
author_email="liuguang@baai.ac.cn",
url="https://github.com/BAAI-Open/FlagAI",
packages=find_packages(exclude="tests"), # same as name
diff --git a/test.py b/test.py
index a4373d7d..d03d28be 100644
--- a/test.py
+++ b/test.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import unittest
test_dir = './tests'
diff --git a/tests/bak_test_autoloader.py b/tests/bak_test_autoloader.py
index 3dcfeeaa..8b7221d6 100644
--- a/tests/bak_test_autoloader.py
+++ b/tests/bak_test_autoloader.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.auto_model.auto_loader import AutoLoader
import unittest
diff --git a/tests/bak_test_glm_seq2seq.py b/tests/bak_test_glm_seq2seq.py
index a4b7a5ce..998cd9f0 100644
--- a/tests/bak_test_glm_seq2seq.py
+++ b/tests/bak_test_glm_seq2seq.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMForSeq2Seq
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer
diff --git a/tests/bak_test_glm_superglue.py b/tests/bak_test_glm_superglue.py
index a6db722a..f026f5e2 100644
--- a/tests/bak_test_glm_superglue.py
+++ b/tests/bak_test_glm_superglue.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.trainer import Trainer
from flagai.model.glm_model import GLMModel, GLMForSequenceClassification, GLMForSingleTokenCloze, GLMForMultiTokenCloze, GLMForMultiTokenClozeFast
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer
diff --git a/tests/test_bert.py b/tests/test_bert.py
index 25f27cd8..fc3386b3 100644
--- a/tests/test_bert.py
+++ b/tests/test_bert.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.auto_model.auto_loader import AutoLoader
from flagai.model.predictor.predictor import Predictor
import torch
diff --git a/tests/test_glm_large_ch.py b/tests/test_glm_large_ch.py
index da0a99f3..b8fcabfa 100644
--- a/tests/test_glm_large_ch.py
+++ b/tests/test_glm_large_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.model.predictor.predictor import Predictor
import torch
from flagai.model.glm_model import GLMForSeq2Seq
diff --git a/tests/test_gpt2_ch.py b/tests/test_gpt2_ch.py
index 8e71135a..cd67f5c1 100644
--- a/tests/test_gpt2_ch.py
+++ b/tests/test_gpt2_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.model.gpt2_model import GPT2Model
from flagai.data.tokenizer.bert.bert_tokenizer import BertTokenizer
import torch
diff --git a/tests/test_t5_ch.py b/tests/test_t5_ch.py
index 3dfc488e..6ae84bc1 100644
--- a/tests/test_t5_ch.py
+++ b/tests/test_t5_ch.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
from flagai.model.t5_model import T5Model
from flagai.data.tokenizer.t5.t5_pegasus_tokenizer import T5PegasusTokenizer
from flagai.model.predictor.predictor import Predictor
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index 2162a156..5900edcd 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -1,3 +1,6 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
import unittest
from flagai.data.tokenizer import GLMLargeChTokenizer
from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer