Skip to content

Commit

Permalink
Develop (#66)
Browse files Browse the repository at this point in the history

* fix bug multi_gpu_training

* Update trainer.py

remove comment

* changed the version

Signed-off-by: Anhforth <yanzhaodong2021@163.com>

* Update trainer.py

* fix_validation_bug (#24)

* updated the version

Signed-off-by: Anhforth <yanzhaodong2021@163.com>
  • Loading branch information
BAAI-OpenPlatform committed Jul 11, 2022
1 parent 3e66bf8 commit 9fb44cf
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 20 deletions.
15 changes: 6 additions & 9 deletions examples/glm_poetry_generation/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
cur_dir = os.path.dirname(os.path.abspath(__file__))
src_dir = cur_dir + '/data/src.txt'
tgt_dir = cur_dir + '/data/tgt.txt'
model_dir = "./state_dict/" # ./state_dict/roberta/ # 模型位置


def read_file():
src = []
Expand All @@ -35,14 +33,12 @@ def read_file():
return src, tgt


auto_loader = AutoLoader("seq2seq",
model_name="GLM-large-ch",
model_dir=model_dir)
auto_loader = AutoLoader("lm",
model_name="GLM-large-ch")
model = auto_loader.get_model()
tokenizer = auto_loader.get_tokenizer()
# Custom model and tokenizer:
# model = GLMForSeq2Seq.from_pretrain(download_path=model_dir,model_name='GLM-large-ch')
# tokenizer = GLMLargeChTokenizer()


trainer = Trainer(
env_type="pytorch", #pytorch or deepspeed
experiment_name="glm_seq2seq",
Expand All @@ -66,6 +62,7 @@ def read_file():
hostfile='./hostfile',
deepspeed_config='./deepspeed.json',
training_script=__file__,
model_parallel_size=8
)


Expand Down Expand Up @@ -123,7 +120,7 @@ def __call__(self, batch):
loss_mask[i] = self.pad_loss_mask(loss_mask[i], max_length)
return {
'input_ids': torch.LongTensor(input_ids),
'target_ids': torch.LongTensor(target_ids),
'labels': torch.LongTensor(target_ids),
'position_ids': torch.LongTensor(position_ids),
'attention_mask': torch.LongTensor(attention_mask),
'loss_mask': torch.LongTensor(loss_mask)
Expand Down
2 changes: 1 addition & 1 deletion examples/glm_title_generation/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def __call__(self, batch):
loss_mask[i] = self.pad_loss_mask(loss_mask[i], max_length)
return {
'input_ids': torch.LongTensor(input_ids),
'target_ids': torch.LongTensor(target_ids),
'labels': torch.LongTensor(target_ids),
'position_ids': torch.LongTensor(position_ids),
'attention_mask': torch.LongTensor(attention_mask),
'loss_mask': torch.LongTensor(loss_mask)
Expand Down
10 changes: 7 additions & 3 deletions flagai/auto_model/auto_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __getattr__(self, name):
"opt-13b-en": ["flagai.model.opt_model","OPTModel", "opt"],
"opt-30b-en": ["flagai.model.opt_model","OPTModel", "opt"],
"opt-66b-en": ["flagai.model.opt_model","OPTModel", "opt"],

"glm-10b-ch": ["flagai.model.glm_model", "GLMModel", "glm"],
}

TOKENIZER_DICT = {
Expand All @@ -89,6 +89,10 @@ def __getattr__(self, name):
"flagai.data.tokenizer.glm_large_en.glm_large_en_tokenizer",
"GLMLargeEnWordPieceTokenizer"
],
"glm-10b-ch": [
"flagai.data.tokenizer.glm_large_ch.glm_large_ch_tokenizer",
"GLMLargeChTokenizer"
],
"gpt2-base-ch": ["flagai.data.tokenizer.bert.bert_tokenizer", "BertTokenizer"],
"cpm-large-ch": ["flagai.data.tokenizer.cpm_1.cpm1_tokenizer", "CPMTokenizer"],
"opt-125m-en": ["flagai.data.tokenizer.opt.opt_en_tokenizer","OPTTokenizer"],
Expand Down Expand Up @@ -180,11 +184,11 @@ def __init__(self,
model_id = _get_model_id(model_name)

print("*"*20, task_name, model_id, model_name)
if model_name == 'glm-large-ch':
if "glm" in model_name and "ch" in model_name:
vocab_file = os.path.join(download_path,'cog-pretrained.model')
if not os.path.exists(vocab_file):
vocab_file = _get_vocab_path(download_path, "cog-pretrain.model", model_id)
elif model_name == "glm-large-en":
elif "glm" in model_name and "en" in model_name:
vocab_file = "GLM-large-en"
elif model_name == "cpm-large-ch":
# two files to load
Expand Down
16 changes: 10 additions & 6 deletions flagai/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,18 +924,22 @@ def evaluate(self,
all_labels.append(labels)
all_losses.append(lm_loss.view(1))

all_logits = torch.cat(all_logits, dim=0)
all_labels = torch.cat(all_labels, dim=0)
if len(self.metric_methods) != 0:
all_logits = torch.cat(all_logits, dim=0)
all_labels = torch.cat(all_labels, dim=0)

all_losses = torch.cat(all_losses, dim=0)

if self.env_type == 'pytorchDDP' or self.env_type == 'deepspeed':
all_logits = self._gather_all(all_logits)
all_labels = self._gather_all(all_labels)
if len(self.metric_methods) != 0:
all_logits = self._gather_all(all_logits)
all_labels = self._gather_all(all_labels)
all_losses = self._gather_all(all_losses)

elif self.env_type == 'deepspeed+mpu':
all_logits = self._gather_all_mpu(all_logits)
all_labels = self._gather_all_mpu(all_labels)
if len(self.metric_methods) != 0:
all_logits = self._gather_all_mpu(all_logits)
all_labels = self._gather_all_mpu(all_labels)
all_losses = self._gather_all_mpu(all_losses)

if all_losses.device != torch.device('cpu'):
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

setup(
name="flagai",
version="v1.1.2",
version="v1.1.3",

description="FlagAI aims to help researchers and developers to freely train and test large-scale models for NLP tasks.",
long_description=open("README.md", encoding="utf-8").read(),
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 9fb44cf

Please sign in to comment.