Develop (#66)

* fix bug multi_gpu_training * Update trainer.py remove comment * changed the version Signed-off-by: Anhforth <yanzhaodong2021@163.com> * Update trainer.py * fix_validation_bug (#24) * updated the version Signed-off-by: Anhforth <yanzhaodong2021@163.com>
FlagAI-Open · Jul 11, 2022 · 9fb44cf · 9fb44cf
1 parent 3e66bf8
commit 9fb44cf
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 20 deletions.
diff --git a/examples/glm_poetry_generation/train.py b/examples/glm_poetry_generation/train.py
@@ -10,8 +10,6 @@
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 src_dir = cur_dir + '/data/src.txt'
 tgt_dir = cur_dir + '/data/tgt.txt'
-model_dir = "./state_dict/" # ./state_dict/roberta/ # 模型位置
-
 
 def read_file():
  src = []
@@ -35,14 +33,12 @@ def read_file():
  return src, tgt
 
 
-auto_loader = AutoLoader("seq2seq",
- model_name="GLM-large-ch",
- model_dir=model_dir)
+auto_loader = AutoLoader("lm",
+ model_name="GLM-large-ch")
 model = auto_loader.get_model()
 tokenizer = auto_loader.get_tokenizer()
-# Custom model and tokenizer:
-# model = GLMForSeq2Seq.from_pretrain(download_path=model_dir,model_name='GLM-large-ch')
-# tokenizer = GLMLargeChTokenizer()
+
+
 trainer = Trainer(
  env_type="pytorch", #pytorch or deepspeed
  experiment_name="glm_seq2seq",
@@ -66,6 +62,7 @@ def read_file():
  hostfile='./hostfile',
  deepspeed_config='./deepspeed.json',
  training_script=__file__,
+ model_parallel_size=8
 )
 
 
@@ -123,7 +120,7 @@ def __call__(self, batch):
  loss_mask[i] = self.pad_loss_mask(loss_mask[i], max_length)
  return {
  'input_ids': torch.LongTensor(input_ids),
- 'target_ids': torch.LongTensor(target_ids),
+ 'labels': torch.LongTensor(target_ids),
  'position_ids': torch.LongTensor(position_ids),
  'attention_mask': torch.LongTensor(attention_mask),
  'loss_mask': torch.LongTensor(loss_mask)

diff --git a/examples/glm_title_generation/train.py b/examples/glm_title_generation/train.py
@@ -125,7 +125,7 @@ def __call__(self, batch):
  loss_mask[i] = self.pad_loss_mask(loss_mask[i], max_length)
  return {
  'input_ids': torch.LongTensor(input_ids),
- 'target_ids': torch.LongTensor(target_ids),
+ 'labels': torch.LongTensor(target_ids),
  'position_ids': torch.LongTensor(position_ids),
  'attention_mask': torch.LongTensor(attention_mask),
  'loss_mask': torch.LongTensor(loss_mask)

diff --git a/flagai/auto_model/auto_loader.py b/flagai/auto_model/auto_loader.py
@@ -73,7 +73,7 @@ def __getattr__(self, name):
  "opt-13b-en": ["flagai.model.opt_model","OPTModel", "opt"],
  "opt-30b-en": ["flagai.model.opt_model","OPTModel", "opt"],
  "opt-66b-en": ["flagai.model.opt_model","OPTModel", "opt"],
-
+ "glm-10b-ch": ["flagai.model.glm_model", "GLMModel", "glm"],
 }
 
 TOKENIZER_DICT = {
@@ -89,6 +89,10 @@ def __getattr__(self, name):
  "flagai.data.tokenizer.glm_large_en.glm_large_en_tokenizer",
  "GLMLargeEnWordPieceTokenizer"
  ],
+ "glm-10b-ch": [
+ "flagai.data.tokenizer.glm_large_ch.glm_large_ch_tokenizer",
+ "GLMLargeChTokenizer"
+ ],
  "gpt2-base-ch": ["flagai.data.tokenizer.bert.bert_tokenizer", "BertTokenizer"],
  "cpm-large-ch": ["flagai.data.tokenizer.cpm_1.cpm1_tokenizer", "CPMTokenizer"],
  "opt-125m-en": ["flagai.data.tokenizer.opt.opt_en_tokenizer","OPTTokenizer"],
@@ -180,11 +184,11 @@ def __init__(self,
  model_id = _get_model_id(model_name)
 
  print("*"*20, task_name, model_id, model_name)
- if model_name == 'glm-large-ch':
+ if "glm" in model_name and "ch" in model_name:
  vocab_file = os.path.join(download_path,'cog-pretrained.model')
  if not os.path.exists(vocab_file):
  vocab_file = _get_vocab_path(download_path, "cog-pretrain.model", model_id)
- elif model_name == "glm-large-en":
+ elif "glm" in model_name and "en" in model_name:
  vocab_file = "GLM-large-en"
  elif model_name == "cpm-large-ch":
  # two files to load

diff --git a/flagai/trainer.py b/flagai/trainer.py
@@ -924,18 +924,22 @@ def evaluate(self,
  all_labels.append(labels)
  all_losses.append(lm_loss.view(1))
 
- all_logits = torch.cat(all_logits, dim=0)
- all_labels = torch.cat(all_labels, dim=0)
+ if len(self.metric_methods) != 0:
+ all_logits = torch.cat(all_logits, dim=0)
+ all_labels = torch.cat(all_labels, dim=0)
+
  all_losses = torch.cat(all_losses, dim=0)
 
  if self.env_type == 'pytorchDDP' or self.env_type == 'deepspeed':
- all_logits = self._gather_all(all_logits)
- all_labels = self._gather_all(all_labels)
+ if len(self.metric_methods) != 0:
+ all_logits = self._gather_all(all_logits)
+ all_labels = self._gather_all(all_labels)
  all_losses = self._gather_all(all_losses)
 
  elif self.env_type == 'deepspeed+mpu':
- all_logits = self._gather_all_mpu(all_logits)
- all_labels = self._gather_all_mpu(all_labels)
+ if len(self.metric_methods) != 0:
+ all_logits = self._gather_all_mpu(all_logits)
+ all_labels = self._gather_all_mpu(all_labels)
  all_losses = self._gather_all_mpu(all_losses)
 
  if all_losses.device != torch.device('cpu'):

diff --git a/setup.py b/setup.py
@@ -5,7 +5,8 @@
 
 setup(
  name="flagai",
- version="v1.1.2",
+ version="v1.1.3",
+
  description="FlagAI aims to help researchers and developers to freely train and test large-scale models for NLP tasks.",
  long_description=open("README.md", encoding="utf-8").read(),
  long_description_content_type="text/markdown",