-
Notifications
You must be signed in to change notification settings - Fork 115
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
206 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
import torch | ||
from datasets import load_dataset | ||
from mmengine.dataset import DefaultSampler | ||
from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, | ||
LoggerHook, ParamSchedulerHook) | ||
from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR | ||
from peft import LoraConfig | ||
from torch.optim import AdamW | ||
from transformers import (AutoModelForCausalLM, AutoTokenizer, | ||
BitsAndBytesConfig) | ||
|
||
from xtuner.dataset import process_hf_dataset | ||
from xtuner.dataset.collate_fns import default_collate_fn | ||
from xtuner.dataset.map_fns import template_map_fn_factory | ||
from xtuner.engine import DatasetInfoHook, EvaluateChatHook | ||
from xtuner.model import SupervisedFinetune | ||
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE | ||
|
||
####################################################################### | ||
# PART 1 Settings # | ||
####################################################################### | ||
# Model | ||
pretrained_model_name_or_path = '/content/internlm2-chat-7b' | ||
# Data | ||
data_path = '/content/mother_v2_3838-pr.json' | ||
prompt_template = PROMPT_TEMPLATE.internlm2_chat | ||
max_length = 2048 | ||
pack_to_max_length = True | ||
|
||
# Scheduler & Optimizer | ||
batch_size = 2 # per_device | ||
accumulative_counts = 2 | ||
dataloader_num_workers = 0 | ||
max_epochs = 3 | ||
optim_type = AdamW | ||
lr = 2e-4 | ||
betas = (0.9, 0.999) | ||
weight_decay = 0 | ||
max_norm = 1 # grad clip | ||
warmup_ratio = 0.03 | ||
|
||
# Evaluate the generation performance during the training | ||
evaluation_freq = 500 | ||
SYSTEM = f'''你是一个心理专家, 除了在心理方面拥有广博的知识储备和丰富的研究咨询经验, 还具有科学家的如下特质: | ||
1.客观理性:科学家会在处理感情问题时保持一定的客观和理性。例如,当他们遇到争执时,可能会试图从一个更客观的角度分析问题的根源,而不是让情绪主导。他们可能会提出具体的问题,试图理解双方的观点,并寻找基于逻辑和事实的解决方案。 | ||
2.深入探讨:科学家在对话中会展现出对深层次理解的追求。在与别人讨论话题时,他们可能不满足于表面的聊天,而是倾向于深入探讨背后的原因和动机。例如,当谈论到个人的兴趣或职业选择时,他们可能会好奇地询问为什么她做出这样的选择,以及这背后的心理动力是什么。 | ||
3.理性沟通:在遇到感情纠纷或误解时,科学家会倾向于通过理性的沟通来解决问题。他们可能会提倡开放和诚实的对话,鼓励双方表达自己的感受和观点,并尝试找到双方都能接受的解决方案。他们可能会避免使用指责的语言,而是努力理解对方的立场,并寻求共同的理解。 | ||
4.好奇心:在日常生活中,科学家会表现出对朋友生活的好奇心。他们可能对她的工作、爱好、或是过去的经历感兴趣,并愿意花时间去了解和探索。这种好奇心不仅可以增加双方的交流和了解,也能使关系更加丰富多彩。 | ||
5.在与他人交流时,科学家会注重清晰和精确的表达,有时会引用相关知识库和相关研究结果,有时会引用相关著作的内容来证明自己的观点。同时,他们也可能会倾听他人的观点,并以开放的心态接受不同的意见和反馈。 | ||
我现在有一些问题,请你解答: | ||
''' | ||
evaluation_inputs = [ | ||
'我最近总是感到很焦虑,尤其是在学业上。我有个特别崇拜的同学,他好像在各方面都比我优秀,我总觉得自己怎么努力也追不上他,这让我压力特别大。', '我知道应该理性看待,但就是忍不住会去比较。我甚至晚上会因为这个睡不着觉,总想着怎样才能像他那样出色。' | ||
] | ||
|
||
|
||
|
||
####################################################################### | ||
# PART 2 Model & Tokenizer # | ||
####################################################################### | ||
tokenizer = dict( | ||
type=AutoTokenizer.from_pretrained, | ||
pretrained_model_name_or_path=pretrained_model_name_or_path, | ||
trust_remote_code=True, | ||
padding_side='right') | ||
|
||
model = dict( | ||
type=SupervisedFinetune, | ||
llm=dict( | ||
type=AutoModelForCausalLM.from_pretrained, | ||
pretrained_model_name_or_path=pretrained_model_name_or_path, | ||
trust_remote_code=True, | ||
torch_dtype=torch.float16, | ||
quantization_config=dict( | ||
type=BitsAndBytesConfig, | ||
load_in_4bit=True, | ||
load_in_8bit=False, | ||
llm_int8_threshold=6.0, | ||
llm_int8_has_fp16_weight=False, | ||
bnb_4bit_compute_dtype=torch.float16, | ||
bnb_4bit_use_double_quant=True, | ||
bnb_4bit_quant_type='nf4')), | ||
lora=dict( | ||
type=LoraConfig, | ||
r=64, | ||
lora_alpha=16, | ||
lora_dropout=0.1, | ||
bias='none', | ||
task_type='CAUSAL_LM')) | ||
|
||
####################################################################### | ||
# PART 3 Dataset & Dataloader # | ||
####################################################################### | ||
alpaca_en = dict( | ||
type=process_hf_dataset, | ||
dataset=dict(type=load_dataset, path='json', data_files=dict(train=data_path)), | ||
tokenizer=tokenizer, | ||
max_length=max_length, | ||
dataset_map_fn=None, | ||
template_map_fn=dict( | ||
type=template_map_fn_factory, template=prompt_template), | ||
remove_unused_columns=True, | ||
shuffle_before_pack=True, | ||
pack_to_max_length=pack_to_max_length) | ||
|
||
train_dataloader = dict( | ||
batch_size=batch_size, | ||
num_workers=dataloader_num_workers, | ||
dataset=alpaca_en, | ||
sampler=dict(type=DefaultSampler, shuffle=True), | ||
collate_fn=dict(type=default_collate_fn)) | ||
|
||
####################################################################### | ||
# PART 4 Scheduler & Optimizer # | ||
####################################################################### | ||
# optimizer | ||
optim_wrapper = dict( | ||
type=AmpOptimWrapper, | ||
optimizer=dict( | ||
type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay), | ||
clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False), | ||
accumulative_counts=accumulative_counts, | ||
loss_scale='dynamic', | ||
dtype='float16') | ||
|
||
# learning policy | ||
# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501 | ||
param_scheduler = [ | ||
dict( | ||
type=LinearLR, | ||
start_factor=1e-5, | ||
by_epoch=True, | ||
begin=0, | ||
end=warmup_ratio * max_epochs, | ||
convert_to_iter_based=True), | ||
dict( | ||
type=CosineAnnealingLR, | ||
eta_min=0.0, | ||
by_epoch=True, | ||
begin=warmup_ratio * max_epochs, | ||
T_max=max_epochs, | ||
convert_to_iter_based=True) | ||
] | ||
|
||
# train, val, test setting | ||
train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1) | ||
|
||
####################################################################### | ||
# PART 5 Runtime # | ||
####################################################################### | ||
# Log the dialogue periodically during the training process, optional | ||
custom_hooks = [ | ||
dict(type=DatasetInfoHook, tokenizer=tokenizer), | ||
dict( | ||
type=EvaluateChatHook, | ||
tokenizer=tokenizer, | ||
every_n_iters=evaluation_freq, | ||
evaluation_inputs=evaluation_inputs, | ||
system=SYSTEM, | ||
prompt_template=prompt_template) | ||
] | ||
|
||
# configure default hooks | ||
default_hooks = dict( | ||
# record the time of every iteration. | ||
timer=dict(type=IterTimerHook), | ||
# print log every 100 iterations. | ||
logger=dict(type=LoggerHook, interval=10), | ||
# enable the parameter scheduler. | ||
param_scheduler=dict(type=ParamSchedulerHook), | ||
# save checkpoint per epoch. | ||
checkpoint=dict(type=CheckpointHook, interval=1), | ||
# set sampler seed in distributed evrionment. | ||
sampler_seed=dict(type=DistSamplerSeedHook), | ||
) | ||
|
||
# configure environment | ||
env_cfg = dict( | ||
# whether to enable cudnn benchmark | ||
cudnn_benchmark=False, | ||
# set multi process parameters | ||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), | ||
# set distributed parameters | ||
dist_cfg=dict(backend='nccl'), | ||
) | ||
|
||
# set visualizer | ||
visualizer = None | ||
|
||
# set log level | ||
log_level = 'INFO' | ||
|
||
# load from which checkpoint | ||
load_from = None | ||
|
||
# whether to resume training from the loaded checkpoint | ||
resume = False | ||
|
||
# Defaults to use random seed and disable `deterministic` | ||
randomness = dict(seed=None, deterministic=False) | ||
|
||
#xtuner train internlm2_7b_chat_qlora_e3_mother.py --deepspeed deepspeed_zero2 | ||
|