diff --git a/LICENSE b/LICENSE index 7a4a3ea2..0f76b3f8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,4 @@ +Copyright © 2022 BAAI. All rights reserved. Apache License Version 2.0, January 2004 @@ -175,28 +176,3 @@ of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index c3172981..8c549251 100644 --- a/README.md +++ b/README.md @@ -152,20 +152,13 @@ Thanks for your interest in contributing! There are many ways to get involved; start with our [contributor guidelines](CONTRIBUTING.md) and then check these [open issues](https://github.com/BAAI-WuDao/Sailing/issues) for specific tasks. +## Contact us +Scan wechat QR code -## [License](/LICENSE) -``` -Copyright [2022] [BAAI] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + -http://www.apache.org/licenses/LICENSE-2.0 +## [License](/LICENSE) +The majority of FlagAI is licensed under the [Apache 2.0 license](LICENSE), however portions of the project are available under separate license terms: -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -``` +* Megatron-LM is licensed under the [Megatron-LM license](https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE) +* GLM is licensed under the [MIT license](https://github.com/THUDM/GLM/blob/main/LICENSE) \ No newline at end of file diff --git a/README_zh.md b/README_zh.md index af4f1fdd..3e01ee80 100644 --- a/README_zh.md +++ b/README_zh.md @@ -213,20 +213,14 @@ for text_pair in test_data: # 贡献代码 感谢您对贡献的兴趣! 参与的方式有很多; 从我们的[贡献者指南](CONTRIBUTING.md) 开始,然后检查这些[未解决的问题](https://github.com/BAAI-WuDao/Sailing/issues)以执行特定任务。 +# 联系我们 +欢迎扫码加入FlagAI用户群 -# [许可证](/LICENSE) -``` -Copyright [2022] [BAAI] + -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +# [许可证](/LICENSE) +大部分的FlagAI项目是基于[Apache 2.0 license](LICENSE), 但是部分的代码是基于其他的协议: -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -``` +* Megatron-LM 是基于协议[Megatron-LM license](https://github.com/NVIDIA/Megatron-LM/blob/main/LICENSE) +* GLM 是基于协议[MIT license](https://github.com/THUDM/GLM/blob/main/LICENSE) diff --git a/examples/bert_title_generation_english/generate.py b/examples/bert_title_generation_english/generate.py index 9210c5fc..007a8431 100755 --- a/examples/bert_title_generation_english/generate.py +++ b/examples/bert_title_generation_english/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/bert_title_generation_english/train.py b/examples/bert_title_generation_english/train.py index 555adb87..8bd40899 100644 --- a/examples/bert_title_generation_english/train.py +++ b/examples/bert_title_generation_english/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import torch from torch.utils.data import Dataset diff --git a/examples/glm_blank_filling/glm_generate_samples.py b/examples/glm_blank_filling/glm_generate_samples.py index 043928b6..3c0e17ef 100755 --- a/examples/glm_blank_filling/glm_generate_samples.py +++ b/examples/glm_blank_filling/glm_generate_samples.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/examples/glm_poetry_generation/generate.py b/examples/glm_poetry_generation/generate.py index 1c1e86b3..6bb6711d 100755 --- a/examples/glm_poetry_generation/generate.py +++ b/examples/glm_poetry_generation/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/glm_poetry_generation/train.py b/examples/glm_poetry_generation/train.py index 101c387e..b6ca393b 100644 --- a/examples/glm_poetry_generation/train.py +++ b/examples/glm_poetry_generation/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import torch from torch.utils.data import Dataset diff --git a/examples/glm_pretrain/train.py b/examples/glm_pretrain/train.py index 4a92d099..541e6fb4 100644 --- a/examples/glm_pretrain/train.py +++ b/examples/glm_pretrain/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.data.tokenizer import GLMLargeChTokenizer from flagai.model.glm_model import GLMForSeq2Seq from flagai.trainer import Trainer diff --git a/examples/glm_seq2seq/train.py b/examples/glm_seq2seq/train.py index dcd8f06e..8cc14c3a 100644 --- a/examples/glm_seq2seq/train.py +++ b/examples/glm_seq2seq/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMForSeq2Seq from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer diff --git a/examples/glm_superglue/train.py b/examples/glm_superglue/train.py index d337d432..e4fc9bd1 100644 --- a/examples/glm_superglue/train.py +++ b/examples/glm_superglue/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMForSingleTokenCloze, GLMForMultiTokenCloze, GLMForMultiTokenClozeFast from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer diff --git a/examples/glm_superglue/train_10b_clue.py b/examples/glm_superglue/train_10b_clue.py index 0baa19b4..70858740 100644 --- a/examples/glm_superglue/train_10b_clue.py +++ b/examples/glm_superglue/train_10b_clue.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMForSingleTokenCloze from flagai.data.tokenizer import GLMLargeChTokenizer diff --git a/examples/glm_superglue/train_10b_superglue.py b/examples/glm_superglue/train_10b_superglue.py index 56efdbc1..b98847ef 100644 --- a/examples/glm_superglue/train_10b_superglue.py +++ b/examples/glm_superglue/train_10b_superglue.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMForSingleTokenCloze from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer diff --git a/examples/glm_superglue/train_prefix.py b/examples/glm_superglue/train_prefix.py index dd374a89..9358c28a 100644 --- a/examples/glm_superglue/train_prefix.py +++ b/examples/glm_superglue/train_prefix.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMForSingleTokenCloze, GLMForMultiTokenCloze, GLMForMultiTokenClozeFast, GLMForSequenceClassification from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer diff --git a/examples/glm_title_generation/generate.py b/examples/glm_title_generation/generate.py index a16044cc..a143c8b6 100644 --- a/examples/glm_title_generation/generate.py +++ b/examples/glm_title_generation/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/glm_title_generation/train.py b/examples/glm_title_generation/train.py index 35823d2c..cbd47b60 100644 --- a/examples/glm_title_generation/train.py +++ b/examples/glm_title_generation/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import numpy as np import torch diff --git a/examples/gpt2_text_writting/generate.py b/examples/gpt2_text_writting/generate.py index 544d43fd..cec5bc86 100644 --- a/examples/gpt2_text_writting/generate.py +++ b/examples/gpt2_text_writting/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/gpt2_title_generation/generate.py b/examples/gpt2_title_generation/generate.py index 1dc8eb2b..488009f7 100755 --- a/examples/gpt2_title_generation/generate.py +++ b/examples/gpt2_title_generation/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/gpt2_title_generation/train.py b/examples/gpt2_title_generation/train.py index 8e36fc7c..31d0efc8 100644 --- a/examples/gpt2_title_generation/train.py +++ b/examples/gpt2_title_generation/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import sys sys.path.append('/data/liuguang/Sailing') diff --git a/examples/gpt2_title_generation/train_multi_gpu.py b/examples/gpt2_title_generation/train_multi_gpu.py index fcffdea1..6f938171 100644 --- a/examples/gpt2_title_generation/train_multi_gpu.py +++ b/examples/gpt2_title_generation/train_multi_gpu.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import sys sys.path.append("/data/liuguang/Sailing") diff --git a/examples/roberta_faq/1_construct_data.py b/examples/roberta_faq/1_construct_data.py index acebc2b2..23e288e0 100644 --- a/examples/roberta_faq/1_construct_data.py +++ b/examples/roberta_faq/1_construct_data.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # construct data # data from https://github.com/murufeng/ChineseNlpCorpus import torch diff --git a/examples/roberta_faq/2_test_bert_faq.py b/examples/roberta_faq/2_test_bert_faq.py index d5de394e..2fca801f 100644 --- a/examples/roberta_faq/2_test_bert_faq.py +++ b/examples/roberta_faq/2_test_bert_faq.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import faiss import torch from flagai.auto_model.auto_loader import AutoLoader diff --git a/examples/roberta_ner/generate.py b/examples/roberta_ner/generate.py index 4e436279..d94ace1d 100755 --- a/examples/roberta_ner/generate.py +++ b/examples/roberta_ner/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/roberta_ner/generate_crf.py b/examples/roberta_ner/generate_crf.py index e65af51b..ec690263 100755 --- a/examples/roberta_ner/generate_crf.py +++ b/examples/roberta_ner/generate_crf.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/roberta_ner/generate_global_pointer.py b/examples/roberta_ner/generate_global_pointer.py index 54c57530..84df2ba9 100755 --- a/examples/roberta_ner/generate_global_pointer.py +++ b/examples/roberta_ner/generate_global_pointer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/roberta_ner/train.py b/examples/roberta_ner/train.py index bafec717..25579629 100644 --- a/examples/roberta_ner/train.py +++ b/examples/roberta_ner/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os from tqdm import tqdm diff --git a/examples/roberta_ner/train_crf.py b/examples/roberta_ner/train_crf.py index ff9c9343..64ccffc5 100644 --- a/examples/roberta_ner/train_crf.py +++ b/examples/roberta_ner/train_crf.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from tqdm import tqdm import torch from torch.utils.data import Dataset diff --git a/examples/roberta_ner/train_global_pointer.py b/examples/roberta_ner/train_global_pointer.py index d164726f..ffba8d8f 100644 --- a/examples/roberta_ner/train_global_pointer.py +++ b/examples/roberta_ner/train_global_pointer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from tqdm import tqdm import torch from torch.utils.data import Dataset diff --git a/examples/roberta_semantic_matching/generate.py b/examples/roberta_semantic_matching/generate.py index 6548d55e..70cb8375 100755 --- a/examples/roberta_semantic_matching/generate.py +++ b/examples/roberta_semantic_matching/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/roberta_semantic_matching/train.py b/examples/roberta_semantic_matching/train.py index 40d896bb..6636169a 100644 --- a/examples/roberta_semantic_matching/train.py +++ b/examples/roberta_semantic_matching/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import torch from torch.utils.data import Dataset diff --git a/examples/roberta_title_generation/generate.py b/examples/roberta_title_generation/generate.py index 0564a515..51dd9123 100755 --- a/examples/roberta_title_generation/generate.py +++ b/examples/roberta_title_generation/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/roberta_title_generation/train.py b/examples/roberta_title_generation/train.py index 65dcdeb6..73505de1 100644 --- a/examples/roberta_title_generation/train.py +++ b/examples/roberta_title_generation/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import torch from torch.utils.data import Dataset diff --git a/examples/roberta_title_generation/train_multi_gpu.py b/examples/roberta_title_generation/train_multi_gpu.py index 5923e9c2..c344b8e6 100644 --- a/examples/roberta_title_generation/train_multi_gpu.py +++ b/examples/roberta_title_generation/train_multi_gpu.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import sys sys.path.append("/data/liuguang/Sailing") diff --git a/examples/t5_flagai_11b/deepspeed.json b/examples/t5_flagai_11b/deepspeed.json new file mode 100644 index 00000000..a0dfee13 --- /dev/null +++ b/examples/t5_flagai_11b/deepspeed.json @@ -0,0 +1,40 @@ +{ + "train_micro_batch_size_per_gpu": 2, + "gradient_accumulation_steps": 1, + "steps_per_print": 100, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": 3, + "contiguous_gradients": false, + "overlap_comm": true, + "reduce_scatter": true, + "reduce_bucket_size": 5e7, + "allgather_bucket_size": 5e7, + "cpu_offload": true + }, + "zero_allow_untested_optimizer": true, + "fp16": { + "enabled": true, + "loss_scale": 0, + "loss_scale_window": 1000, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "optimizer": { + "type": "Adam", + "params": { + "lr": 0.0004, + "weight_decay": 0.01, + "betas": [ + 0.9, + 0.98 + ], + "eps": 1e-6 + } + }, + "activation_checkpointing": { + "partition_activations": false, + "contiguous_memory_optimization": false + }, + "wall_clock_breakdown": false + } \ No newline at end of file diff --git a/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py b/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py index 197a3149..7bfc5cb7 100644 --- a/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py +++ b/examples/t5_flagai_11b/train_title_with_flagai_t5_11b.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import sys sys.path.append('/mnt/liuguang/FlagAI') @@ -74,7 +77,7 @@ def read_file(): model = T5ForConditionalGeneration.from_pretrain(download_path='/mnt', model_name='t5-11b') - +model.gradient_checkpointing = True print("loading model & tokenizer is done!") maxlen = 1024 diff --git a/examples/t5_huggingface/train_t5_11b.py b/examples/t5_huggingface/train_t5_11b.py index 59e0479a..090d0397 100644 --- a/examples/t5_huggingface/train_t5_11b.py +++ b/examples/t5_huggingface/train_t5_11b.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from transformers import T5ForConditionalGeneration, T5Tokenizer from torch.utils.data import Dataset diff --git a/examples/t5_title_generation/generate.py b/examples/t5_title_generation/generate.py index 35790b8e..ece29a16 100644 --- a/examples/t5_title_generation/generate.py +++ b/examples/t5_title_generation/generate.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/examples/t5_title_generation/train.py b/examples/t5_title_generation/train.py index b4c5e628..bdb9509a 100644 --- a/examples/t5_title_generation/train.py +++ b/examples/t5_title_generation/train.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import torch from torch.utils.data import Dataset diff --git a/flagai/auto_model/auto_loader.py b/flagai/auto_model/auto_loader.py index 089c1199..3d90b61e 100644 --- a/flagai/auto_model/auto_loader.py +++ b/flagai/auto_model/auto_loader.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import importlib import os from flagai.model.file_utils import _get_model_id, _get_vocab_path diff --git a/flagai/data/collate_utils.py b/flagai/data/collate_utils.py index 2cad78c4..4530d04b 100644 --- a/flagai/data/collate_utils.py +++ b/flagai/data/collate_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import numpy as np import torch diff --git a/flagai/data/dataset/block/blocklm_utils.py b/flagai/data/dataset/block/blocklm_utils.py index 7c806779..0547a7a8 100644 --- a/flagai/data/dataset/block/blocklm_utils.py +++ b/flagai/data/dataset/block/blocklm_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import torch.utils.data # import mpu diff --git a/flagai/data/dataset/block/corpora.py b/flagai/data/dataset/block/corpora.py index c656b049..117b33a2 100644 --- a/flagai/data/dataset/block/corpora.py +++ b/flagai/data/dataset/block/corpora.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import json import tqdm import os diff --git a/flagai/data/dataset/block/data_utils.py b/flagai/data/dataset/block/data_utils.py index ea686c8c..6f98cbb6 100644 --- a/flagai/data/dataset/block/data_utils.py +++ b/flagai/data/dataset/block/data_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/dataset/block/dataset.py b/flagai/data/dataset/block/dataset.py index a1641215..47459ae7 100644 --- a/flagai/data/dataset/block/dataset.py +++ b/flagai/data/dataset/block/dataset.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from pkgutil import get_loader from torch.utils.data import Dataset from itertools import accumulate diff --git a/flagai/data/dataset/block/lazy_loader.py b/flagai/data/dataset/block/lazy_loader.py index 861e7247..521bac7d 100644 --- a/flagai/data/dataset/block/lazy_loader.py +++ b/flagai/data/dataset/block/lazy_loader.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/dataset/data_collator/collate_fn.py b/flagai/data/dataset/data_collator/collate_fn.py index db90b31e..525fa6de 100644 --- a/flagai/data/dataset/data_collator/collate_fn.py +++ b/flagai/data/dataset/data_collator/collate_fn.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import torch.utils.data import random diff --git a/flagai/data/dataset/data_utils.py b/flagai/data/dataset/data_utils.py index d165dfc6..5b56656a 100644 --- a/flagai/data/dataset/data_utils.py +++ b/flagai/data/dataset/data_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/dataset/language_model/dataset.py b/flagai/data/dataset/language_model/dataset.py index 28e1a7fa..2f0e36d2 100644 --- a/flagai/data/dataset/language_model/dataset.py +++ b/flagai/data/dataset/language_model/dataset.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import json import math diff --git a/flagai/data/dataset/seq2seq/dataset.py b/flagai/data/dataset/seq2seq/dataset.py index 66eb3370..88634b79 100644 --- a/flagai/data/dataset/seq2seq/dataset.py +++ b/flagai/data/dataset/seq2seq/dataset.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import os import json import random diff --git a/flagai/data/dataset/superglue/control.py b/flagai/data/dataset/superglue/control.py index 6cddeecd..3cc1fbb0 100644 --- a/flagai/data/dataset/superglue/control.py +++ b/flagai/data/dataset/superglue/control.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.data.dataset.superglue.processor import * from flagai.data.dataset.superglue.pvp import * diff --git a/flagai/data/dataset/superglue/dataset.py b/flagai/data/dataset/superglue/dataset.py index 18074883..92485775 100644 --- a/flagai/data/dataset/superglue/dataset.py +++ b/flagai/data/dataset/superglue/dataset.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from torch.utils.data import Dataset from .control import SuperGlueProcessor from collections import Counter diff --git a/flagai/data/dataset/superglue/processor.py b/flagai/data/dataset/superglue/processor.py index 13b6cc28..d4b6fcfc 100644 --- a/flagai/data/dataset/superglue/processor.py +++ b/flagai/data/dataset/superglue/processor.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import zipfile from abc import ABC, abstractmethod import os diff --git a/flagai/data/dataset/superglue/properties.py b/flagai/data/dataset/superglue/properties.py index b574d7bb..d9d610a1 100644 --- a/flagai/data/dataset/superglue/properties.py +++ b/flagai/data/dataset/superglue/properties.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") urls = { 'axb': 'https://dl.fbaipublicfiles.com/glue/superglue/data/v2/AX-b.zip', diff --git a/flagai/data/dataset/superglue/pvp.py b/flagai/data/dataset/superglue/pvp.py index a5b0a04c..9cd21714 100644 --- a/flagai/data/dataset/superglue/pvp.py +++ b/flagai/data/dataset/superglue/pvp.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/flagai/data/file_utils.py b/flagai/data/file_utils.py index d0d9b317..4afe05e4 100644 --- a/flagai/data/file_utils.py +++ b/flagai/data/file_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # This file is provided as is from: # https://github.com/huggingface/pytorch-pretrained-BERT # Please refer to their repository for copyright. diff --git a/flagai/data/tokenizer/bert/bert_tokenizer.py b/flagai/data/tokenizer/bert/bert_tokenizer.py index f9f795f3..5c0578b3 100644 --- a/flagai/data/tokenizer/bert/bert_tokenizer.py +++ b/flagai/data/tokenizer/bert/bert_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/tokenizer/bert/wordpiece.py b/flagai/data/tokenizer/bert/wordpiece.py index 7f95cad5..cdd9f8f5 100644 --- a/flagai/data/tokenizer/bert/wordpiece.py +++ b/flagai/data/tokenizer/bert/wordpiece.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from __future__ import absolute_import, division, print_function, unicode_literals from typing import List, Union, Optional import collections diff --git a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py index b6ad2190..8bf16ef4 100644 --- a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py +++ b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_bpe_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py index 1e354924..b039b2ac 100644 --- a/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py +++ b/flagai/data/tokenizer/glm_10b_en/glm_10b_en_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team. # diff --git a/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py b/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py index 7853c688..23c69e81 100644 --- a/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py +++ b/flagai/data/tokenizer/glm_large_ch/glm_large_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") """ from https://github.com/openai/gpt-2/, changed for chinese """ diff --git a/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py b/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py index 998fd20d..23e27cb9 100644 --- a/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py +++ b/flagai/data/tokenizer/glm_large_ch/glm_large_ch_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py b/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py index d1dd28df..dbc295fa 100644 --- a/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py +++ b/flagai/data/tokenizer/glm_large_en/glm_large_en_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/tokenizer/glm_large_en/wordpiece.py b/flagai/data/tokenizer/glm_large_en/wordpiece.py index 49a021d2..2e54be28 100644 --- a/flagai/data/tokenizer/glm_large_en/wordpiece.py +++ b/flagai/data/tokenizer/glm_large_en/wordpiece.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # diff --git a/flagai/data/tokenizer/roberta/roberta_tokenizer.py b/flagai/data/tokenizer/roberta/roberta_tokenizer.py index ad118c6f..155edf7e 100644 --- a/flagai/data/tokenizer/roberta/roberta_tokenizer.py +++ b/flagai/data/tokenizer/roberta/roberta_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py b/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py index 87cc7020..9692743b 100644 --- a/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py +++ b/flagai/data/tokenizer/t5/t5_pegasus_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.data.tokenizer.bert.wordpiece import BertTokenizer as Tokenizer from transformers import BertTokenizer import jieba diff --git a/flagai/data/tokenizer/t5/t5_tokenizer.py b/flagai/data/tokenizer/t5/t5_tokenizer.py index 97ac7d4c..f8335364 100644 --- a/flagai/data/tokenizer/t5/t5_tokenizer.py +++ b/flagai/data/tokenizer/t5/t5_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/data/tokenizer/tokenizer.py b/flagai/data/tokenizer/tokenizer.py index 8bec4e8c..557f37b4 100644 --- a/flagai/data/tokenizer/tokenizer.py +++ b/flagai/data/tokenizer/tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/fp16/fp16.py b/flagai/fp16/fp16.py index 435e8012..87e262c2 100644 --- a/flagai/fp16/fp16.py +++ b/flagai/fp16/fp16.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/fp16/fp16util.py b/flagai/fp16/fp16util.py index 4a4b1837..bb02f5c4 100644 --- a/flagai/fp16/fp16util.py +++ b/flagai/fp16/fp16util.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/fp16/loss_scaler.py b/flagai/fp16/loss_scaler.py index 1c310dcb..93f7cd85 100644 --- a/flagai/fp16/loss_scaler.py +++ b/flagai/fp16/loss_scaler.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/launch.py b/flagai/launch.py index 4212b1b7..3fe1d989 100644 --- a/flagai/launch.py +++ b/flagai/launch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # Copyright 2020 The Microsoft DeepSpeed Team """ sailing runner is the main front-end to launching multi-worker diff --git a/flagai/logger.py b/flagai/logger.py index 61ea1045..42558f9d 100644 --- a/flagai/logger.py +++ b/flagai/logger.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import logging import sys import os diff --git a/flagai/metrics.py b/flagai/metrics.py index 2e51d334..d5a27f3c 100644 --- a/flagai/metrics.py +++ b/flagai/metrics.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import re from sklearn.metrics import f1_score diff --git a/flagai/model/base_model.py b/flagai/model/base_model.py index 56fe3ea7..345e3cd3 100644 --- a/flagai/model/base_model.py +++ b/flagai/model/base_model.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from sklearn.linear_model import HuberRegressor from torch.nn import Module import torch diff --git a/flagai/model/bert_model.py b/flagai/model/bert_model.py index 3ab2974c..075f6004 100644 --- a/flagai/model/bert_model.py +++ b/flagai/model/bert_model.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/model/blocks/bert_block.py b/flagai/model/blocks/bert_block.py index 6ce58c9e..0fc0721d 100644 --- a/flagai/model/blocks/bert_block.py +++ b/flagai/model/blocks/bert_block.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/model/blocks/glm_block.py b/flagai/model/blocks/glm_block.py index b66f2adc..c2458606 100644 --- a/flagai/model/blocks/glm_block.py +++ b/flagai/model/blocks/glm_block.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/model/blocks/gpt2_block.py b/flagai/model/blocks/gpt2_block.py index ca8c4041..62925a08 100644 --- a/flagai/model/blocks/gpt2_block.py +++ b/flagai/model/blocks/gpt2_block.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.model.layers.attentions import GPT2Attention from flagai.model.layers.feedforward import GPT2MLP from torch import nn diff --git a/flagai/model/blocks/t5_block.py b/flagai/model/blocks/t5_block.py index 8569d061..a9d89458 100644 --- a/flagai/model/blocks/t5_block.py +++ b/flagai/model/blocks/t5_block.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from torch import nn diff --git a/flagai/model/config.py b/flagai/model/config.py index 3176a243..e61f7f0a 100644 --- a/flagai/model/config.py +++ b/flagai/model/config.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") """ enc_dec model configuration """ import json diff --git a/flagai/model/file_utils.py b/flagai/model/file_utils.py index 4b4bff0d..df31b66a 100644 --- a/flagai/model/file_utils.py +++ b/flagai/model/file_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from time import sleep import requests diff --git a/flagai/model/glm_model.py b/flagai/model/glm_model.py index 4e2749e2..f66d87fe 100644 --- a/flagai/model/glm_model.py +++ b/flagai/model/glm_model.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/model/gpt2_model.py b/flagai/model/gpt2_model.py index 3bb3bd83..ce66b2a5 100644 --- a/flagai/model/gpt2_model.py +++ b/flagai/model/gpt2_model.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import torch.nn as nn import os diff --git a/flagai/model/layers/activations.py b/flagai/model/layers/activations.py index 7bc0fd08..308ede0a 100644 --- a/flagai/model/layers/activations.py +++ b/flagai/model/layers/activations.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # Copyright 2020 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/flagai/model/layers/attentions.py b/flagai/model/layers/attentions.py index ba0ea828..2b24af30 100644 --- a/flagai/model/layers/attentions.py +++ b/flagai/model/layers/attentions.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/model/layers/crf.py b/flagai/model/layers/crf.py index 412c43a9..9f337f51 100644 --- a/flagai/model/layers/crf.py +++ b/flagai/model/layers/crf.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import torch.nn as nn import torch.nn.functional as F diff --git a/flagai/model/layers/embeddings.py b/flagai/model/layers/embeddings.py index 372ad88b..4d97b326 100644 --- a/flagai/model/layers/embeddings.py +++ b/flagai/model/layers/embeddings.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/model/layers/feedforward.py b/flagai/model/layers/feedforward.py index 1bae1f50..eaac43a5 100644 --- a/flagai/model/layers/feedforward.py +++ b/flagai/model/layers/feedforward.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # feedforward import os import torch diff --git a/flagai/model/layers/global_pointer.py b/flagai/model/layers/global_pointer.py index a42d0ed3..5433a271 100644 --- a/flagai/model/layers/global_pointer.py +++ b/flagai/model/layers/global_pointer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch import torch.nn as nn diff --git a/flagai/model/layers/layer_norm.py b/flagai/model/layers/layer_norm.py index 413f6550..1a855f3b 100644 --- a/flagai/model/layers/layer_norm.py +++ b/flagai/model/layers/layer_norm.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # layer norm import torch diff --git a/flagai/model/predictor/predictor.py b/flagai/model/predictor/predictor.py index 8f4d2151..487665b7 100644 --- a/flagai/model/predictor/predictor.py +++ b/flagai/model/predictor/predictor.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import numpy as np import torch import torch.nn.functional as F diff --git a/flagai/model/predictor/utils.py b/flagai/model/predictor/utils.py index 6f9cddae..ab62def4 100644 --- a/flagai/model/predictor/utils.py +++ b/flagai/model/predictor/utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import json import os from typing import List diff --git a/flagai/model/prompt.py b/flagai/model/prompt.py index 893432f3..5044a2aa 100644 --- a/flagai/model/prompt.py +++ b/flagai/model/prompt.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import random import torch diff --git a/flagai/model/t5_model.py b/flagai/model/t5_model.py index f6f9e237..dac5f934 100644 --- a/flagai/model/t5_model.py +++ b/flagai/model/t5_model.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright 2018 Mesh TensorFlow authors, T5 Authors and HuggingFace Inc. team. # diff --git a/flagai/model/utils.py b/flagai/model/utils.py index 3b9c2e0d..b37103c0 100644 --- a/flagai/model/utils.py +++ b/flagai/model/utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import math import torch diff --git a/flagai/mp_tools.py b/flagai/mp_tools.py index 742f4af9..624da69f 100644 --- a/flagai/mp_tools.py +++ b/flagai/mp_tools.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import sys import os import torch diff --git a/flagai/mp_utils.py b/flagai/mp_utils.py index c9a0e0fc..ba78ef8b 100644 --- a/flagai/mp_utils.py +++ b/flagai/mp_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import sys import os import torch diff --git a/flagai/mpu/cross_entropy.py b/flagai/mpu/cross_entropy.py index 567d82e9..2b8ba008 100644 --- a/flagai/mpu/cross_entropy.py +++ b/flagai/mpu/cross_entropy.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/mpu/data.py b/flagai/mpu/data.py index f8d510e0..58a802e3 100644 --- a/flagai/mpu/data.py +++ b/flagai/mpu/data.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/mpu/func_utils.py b/flagai/mpu/func_utils.py index f9eba243..18cfbd65 100644 --- a/flagai/mpu/func_utils.py +++ b/flagai/mpu/func_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/mpu/grads.py b/flagai/mpu/grads.py index 63b9620e..16cec15a 100644 --- a/flagai/mpu/grads.py +++ b/flagai/mpu/grads.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/mpu/initialize.py b/flagai/mpu/initialize.py index ea36988c..1b9afa61 100644 --- a/flagai/mpu/initialize.py +++ b/flagai/mpu/initialize.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/mpu/local_attention_function.py b/flagai/mpu/local_attention_function.py index 50f9d6e7..5d8f22ee 100644 --- a/flagai/mpu/local_attention_function.py +++ b/flagai/mpu/local_attention_function.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from torch import nn import torch.nn.functional as F diff --git a/flagai/mpu/mappings.py b/flagai/mpu/mappings.py index 1f5fa9d9..5c76be09 100644 --- a/flagai/mpu/mappings.py +++ b/flagai/mpu/mappings.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/mpu/random.py b/flagai/mpu/random.py index 82aed5b0..653e6b61 100644 --- a/flagai/mpu/random.py +++ b/flagai/mpu/random.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 #Modified by Samyam Rajbhandari #Used to partition the activations stored for backward propagation diff --git a/flagai/mpu/utils.py b/flagai/mpu/utils.py index 88e9ae41..9aec39f5 100644 --- a/flagai/mpu/utils.py +++ b/flagai/mpu/utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/optimizers.py b/flagai/optimizers.py index 4b33427b..166e1f65 100644 --- a/flagai/optimizers.py +++ b/flagai/optimizers.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch try: from apex.optimizers import FusedAdam as Adam diff --git a/flagai/schedulers.py b/flagai/schedulers.py index 5c717c19..0267bc8a 100644 --- a/flagai/schedulers.py +++ b/flagai/schedulers.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai/test_utils.py b/flagai/test_utils.py index f77343f7..397dd9f3 100644 --- a/flagai/test_utils.py +++ b/flagai/test_utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") def build_input_from_ids(text_a_ids=None, text_b_ids=None, answer_ids=None, diff --git a/flagai/trainer.py b/flagai/trainer.py index 3305661a..c32d8e06 100644 --- a/flagai/trainer.py +++ b/flagai/trainer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # Arguments for training try: import deepspeed.utils diff --git a/flagai/utils.py b/flagai/utils.py index 2af09f3f..be2c39c5 100644 --- a/flagai/utils.py +++ b/flagai/utils.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # diff --git a/flagai_wechat.png b/flagai_wechat.png new file mode 100644 index 00000000..387bded2 Binary files /dev/null and b/flagai_wechat.png differ diff --git a/logo.png b/logo.png index 0136a04e..501e744d 100644 Binary files a/logo.png and b/logo.png differ diff --git a/quickstart/glm_blank_filling_QA_ch.py b/quickstart/glm_blank_filling_QA_ch.py index d3fe4f83..13f183ab 100644 --- a/quickstart/glm_blank_filling_QA_ch.py +++ b/quickstart/glm_blank_filling_QA_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") # coding=utf-8 # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # @@ -29,11 +32,12 @@ # Random seeds for reproducability. # Model, model = GLMModel.from_pretrain(model_name='GLM-large-ch', download_path="./state_dict/") - tokenizer = GLMLargeChTokenizer(vocab_path='./state_dict/GLM-large-ch/cog-pretrain.model', + #tokenizer = GLMLargeChTokenizer(vocab_path='./state_dict/GLM-large-ch/cog-pretrain.model', + tokenizer = GLMLargeChTokenizer( add_block_symbols=True, add_task_mask=True, add_decoder_mask=False, - fix_command_token=False) + fix_command_token=True) model.cuda(torch.cuda.current_device()) diff --git a/quickstart/glm_title_ch.py b/quickstart/glm_title_ch.py index 7c260ee0..882d68bc 100644 --- a/quickstart/glm_title_ch.py +++ b/quickstart/glm_title_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/quickstart/ner_ch.py b/quickstart/ner_ch.py index 0d5923fa..04ede832 100644 --- a/quickstart/ner_ch.py +++ b/quickstart/ner_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/quickstart/title_ch.py b/quickstart/title_ch.py index f0c41f40..4a9531be 100644 --- a/quickstart/title_ch.py +++ b/quickstart/title_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/quickstart/title_en.py b/quickstart/title_en.py index 6f0cbf9b..4b59b1a8 100644 --- a/quickstart/title_en.py +++ b/quickstart/title_en.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import torch from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/quickstart/writing_ch.py b/quickstart/writing_ch.py index ab8331a0..77f1a0be 100644 --- a/quickstart/writing_ch.py +++ b/quickstart/writing_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor diff --git a/setup.py b/setup.py index 0317767e..32ad719f 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from setuptools import find_packages, setup with open("requirements.txt") as f: @@ -5,11 +8,11 @@ setup( name="flagai", - version="v1.0.0-beta6", + version="v1.0.0-beta7", description="FlagAI aims to help researchers and developers to freely train and test large-scale models for NLP tasks.", long_description=open("README.md", encoding="utf-8").read(), long_description_content_type="text/markdown", - author="BAAI Open", + author="BAAI-Open", author_email="liuguang@baai.ac.cn", url="https://github.com/BAAI-Open/FlagAI", packages=find_packages(exclude="tests"), # same as name diff --git a/test.py b/test.py index a4373d7d..d03d28be 100644 --- a/test.py +++ b/test.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import unittest test_dir = './tests' diff --git a/tests/bak_test_autoloader.py b/tests/bak_test_autoloader.py index 3dcfeeaa..8b7221d6 100644 --- a/tests/bak_test_autoloader.py +++ b/tests/bak_test_autoloader.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.auto_model.auto_loader import AutoLoader import unittest diff --git a/tests/bak_test_glm_seq2seq.py b/tests/bak_test_glm_seq2seq.py index a4b7a5ce..998cd9f0 100644 --- a/tests/bak_test_glm_seq2seq.py +++ b/tests/bak_test_glm_seq2seq.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMForSeq2Seq from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer diff --git a/tests/bak_test_glm_superglue.py b/tests/bak_test_glm_superglue.py index a6db722a..f026f5e2 100644 --- a/tests/bak_test_glm_superglue.py +++ b/tests/bak_test_glm_superglue.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.trainer import Trainer from flagai.model.glm_model import GLMModel, GLMForSequenceClassification, GLMForSingleTokenCloze, GLMForMultiTokenCloze, GLMForMultiTokenClozeFast from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer, GLMLargeChTokenizer diff --git a/tests/test_bert.py b/tests/test_bert.py index 25f27cd8..fc3386b3 100644 --- a/tests/test_bert.py +++ b/tests/test_bert.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.auto_model.auto_loader import AutoLoader from flagai.model.predictor.predictor import Predictor import torch diff --git a/tests/test_glm_large_ch.py b/tests/test_glm_large_ch.py index da0a99f3..b8fcabfa 100644 --- a/tests/test_glm_large_ch.py +++ b/tests/test_glm_large_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.model.predictor.predictor import Predictor import torch from flagai.model.glm_model import GLMForSeq2Seq diff --git a/tests/test_gpt2_ch.py b/tests/test_gpt2_ch.py index 8e71135a..cd67f5c1 100644 --- a/tests/test_gpt2_ch.py +++ b/tests/test_gpt2_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.model.gpt2_model import GPT2Model from flagai.data.tokenizer.bert.bert_tokenizer import BertTokenizer import torch diff --git a/tests/test_t5_ch.py b/tests/test_t5_ch.py index 3dfc488e..6ae84bc1 100644 --- a/tests/test_t5_ch.py +++ b/tests/test_t5_ch.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") from flagai.model.t5_model import T5Model from flagai.data.tokenizer.t5.t5_pegasus_tokenizer import T5PegasusTokenizer from flagai.model.predictor.predictor import Predictor diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 2162a156..5900edcd 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -1,3 +1,6 @@ +# Copyright © 2022 BAAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License") import unittest from flagai.data.tokenizer import GLMLargeChTokenizer from flagai.data.tokenizer import GLMLargeEnWordPieceTokenizer