Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

value_and_grad support attacach grads, Parameter support accumulate a… #1833

Merged
merged 1 commit into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2499,11 +2499,11 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from mindspore import value_and_grad\n",
"from mindnlp.core.autograd import value_and_grad\n",
"\n",
"def forward_fn(batch):\n",
" # get the inputs;\n",
Expand All @@ -2524,12 +2524,12 @@
" \n",
" return loss\n",
"\n",
"grad_fn = value_and_grad(forward_fn, None, model.trainable_params())"
"grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"metadata": {
"tags": []
},
Expand Down Expand Up @@ -2824,9 +2824,11 @@
"for epoch in range(num_train_epochs): \n",
" print(\"Epoch:\", epoch)\n",
" for batch in tqdm(train_dataloader.create_dict_iterator()):\n",
" optimizer.zero_grad()\n",
" # forward, backward + optimize\n",
" loss, grads = grad_fn(batch)\n",
" optimizer.step(grads)\n",
" loss = grad_fn(batch)\n",
" optimizer.step()\n",
"\n",
"\n",
" # print loss every 100 steps\n",
" if global_step % 100 == 0:\n",
Expand Down
248 changes: 165 additions & 83 deletions examples/classification/bert_emotect_finetune.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@
"metadata": {},
"outputs": [],
"source": [
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.core import optim\n",
"# Setting up optimizer and learning rate scheduler\n",
"optimizer = optim.AdamW(model.trainable_params(), lr=1e-3)\n",
Expand All @@ -263,7 +263,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -423,18 +423,15 @@
}
],
"source": [
"from mindnlp.core import value_and_grad\n",
"# Forward function to compute the loss\n",
"def forward_fn(**batch):\n",
" outputs = model(**batch)\n",
" loss = outputs.loss\n",
" return loss\n",
"\n",
"# Gradient function to compute gradients for optimization\n",
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
"# Define the training step function\n",
"def train_step(**batch):\n",
" loss,grads = grad_fn(**batch)\n",
" return loss,grads\n",
"grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)\n",
"\n",
"from mindspore import ops\n",
"global_step = 0\n",
Expand All @@ -444,8 +441,9 @@
" train_total_size = train_dataset.get_dataset_size()\n",
" # Iterate over each entry in the training dataset\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)): \n",
" loss,grads = train_step(**batch)\n",
" optimizer.step(grads)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
" total_loss += loss.float() # Accumulate loss for monitoring\n",
" lr_scheduler.step() # Update learning rate based on scheduler\n",
" # model.base_model.update_and_allocate(global_step,grads)\n",
Expand Down
16 changes: 7 additions & 9 deletions llm/peft/dora/dora_finetuning_mindnlp_mt0.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"from mindnlp.core import ops\n",
"\n",
"from mindnlp.transformers import AutoTokenizer\n",
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"from tqdm import tqdm\n",
"\n",
"model_name_or_path = \"bigscience/mt0-small\"\n",
Expand Down Expand Up @@ -246,7 +246,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": null,
"id": "6b3a4090",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -304,25 +304,23 @@
}
],
"source": [
"from mindnlp.core import value_and_grad\n",
"# training and evaluation\n",
"def forward_fn(**batch):\n",
" outputs = model(**batch)\n",
" loss = outputs.loss\n",
" return loss\n",
"\n",
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
"\n",
"def train_step(**batch):\n",
" loss, grads = grad_fn(**batch)\n",
" optimizer.step(grads)\n",
" return loss\n",
"grad_fn = value_and_grad(forward_fn, model.trainable_params(), attach_grads=True)\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.set_train()\n",
" total_loss = 0\n",
" train_total_size = train_dataset.get_dataset_size()\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
" loss = train_step(**batch)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
" total_loss += loss.float()\n",
" lr_scheduler.step()\n",
"\n",
Expand Down
16 changes: 7 additions & 9 deletions llm/peft/ia3/seq_2_seq/peft_ia3_mindnlp.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"from mindnlp.core import ops\n",
"\n",
"from mindnlp.transformers import AutoTokenizer\n",
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"from tqdm import tqdm\n",
"\n",
"model_name_or_path = \"bigscience/mt0-small\"\n",
Expand Down Expand Up @@ -229,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "6b3a4090",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -287,25 +287,23 @@
}
],
"source": [
"from mindnlp.core import value_and_grad\n",
"# training and evaluation\n",
"def forward_fn(**batch):\n",
" outputs = model(**batch)\n",
" loss = outputs.loss\n",
" return loss\n",
"\n",
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
"\n",
"def train_step(**batch):\n",
" loss, grads = grad_fn(**batch)\n",
" optimizer.step(grads)\n",
" return loss\n",
"grad_fn = value_and_grad(forward_fn, model.trainable_params())\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.set_train()\n",
" total_loss = 0\n",
" train_total_size = train_dataset.get_dataset_size()\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
" loss = train_step(**batch)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
" total_loss += loss.float()\n",
" lr_scheduler.step()\n",
"\n",
Expand Down
18 changes: 8 additions & 10 deletions llm/peft/ia3/sequence_classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"\n",
"import mindnlp.evaluate as evaluate\n",
"from mindnlp.dataset import load_dataset\n",
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.transformers import AutoModelForSequenceClassification, AutoTokenizer\n"
]
},
Expand Down Expand Up @@ -527,7 +527,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "fa0e73be",
"metadata": {
"ExecuteTime": {
Expand Down Expand Up @@ -746,24 +746,22 @@
}
],
"source": [
"from mindnlp.core import value_and_grad\n",
"\n",
"def forward_fn(**batch):\n",
" outputs = model(**batch)\n",
" loss = outputs.loss\n",
" return loss\n",
"\n",
"grad_fn = mindspore.value_and_grad(forward_fn, None, tuple(model.parameters()))\n",
"\n",
"def train_step(**batch):\n",
" loss, grads = grad_fn(**batch)\n",
" optimizer.step(grads)\n",
" return loss\n",
"\n",
"grad_fn = value_and_grad(forward_fn, tuple(model.parameters()))\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.set_train()\n",
" train_total_size = train_dataset.get_dataset_size()\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
" loss = train_step(**batch)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
" lr_scheduler.step()\n",
"\n",
" model.set_train(False)\n",
Expand Down
16 changes: 7 additions & 9 deletions llm/peft/lora/lora_seq2seq.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"from mindnlp.core import ops\n",
"\n",
"from mindnlp.transformers import AutoTokenizer\n",
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"from tqdm import tqdm\n",
"\n",
"model_name_or_path = \"bigscience/mt0-large\"\n",
Expand Down Expand Up @@ -318,7 +318,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "6b3a4090",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -390,25 +390,23 @@
}
],
"source": [
"from mindnlp.core import value_and_grad\n",
"# training and evaluation\n",
"def forward_fn(**batch):\n",
" outputs = model(**batch)\n",
" loss = outputs.loss\n",
" return loss\n",
"\n",
"grad_fn = mindspore.value_and_grad(forward_fn, None, model.trainable_params())\n",
"\n",
"def train_step(**batch):\n",
" loss, grads = grad_fn(**batch)\n",
" optimizer.step(grads)\n",
" return loss\n",
"grad_fn = value_and_grad(forward_fn, model.trainable_params())\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.set_train()\n",
" total_loss = 0\n",
" train_total_size = train_dataset.get_dataset_size()\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
" loss = train_step(**batch)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
" total_loss += loss.float()\n",
" lr_scheduler.step()\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
],
"source": [
"from mindnlp.core import optim\n",
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"\n",
"module = MLP()\n",
"for name, param in module.named_parameters():\n",
Expand Down Expand Up @@ -319,7 +319,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "1cfbbe7e",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -783,25 +783,25 @@
],
"source": [
"from tqdm import tqdm\n",
"from mindnlp.core import value_and_grad\n",
"\n",
"def forward_fn(**batch):\n",
" outputs = model(batch[\"input_ids\"])\n",
" loss = criterion(outputs, batch[\"labels\"])\n",
" return loss\n",
"\n",
"grad_fn = ms.value_and_grad(forward_fn, None, model.trainable_params())\n",
"grad_fn = value_and_grad(forward_fn, model.trainable_params())\n",
"\n",
"def train_step(**batch):\n",
" loss, grads = grad_fn(**batch)\n",
" optimizer.step(grads)\n",
" return loss\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.set_train(True)\n",
" train_loss = 0\n",
" train_total_size = train_dataset.get_dataset_size()\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
" loss = train_step(**batch)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
"\n",
" train_loss += loss.float()\n",
" lr_scheduler.step()\n",
"\n",
Expand Down
14 changes: 6 additions & 8 deletions llm/peft/lora/roberta_sequence_classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"from mindnlp.dataset import load_dataset\n",
"from mindnlp.engine import set_seed\n",
"from mindnlp.transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
"from mindnlp.transformers.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.common.optimization import get_linear_schedule_with_warmup\n",
"from mindnlp.peft import (\n",
" get_peft_config,\n",
" get_peft_model,\n",
Expand Down Expand Up @@ -462,23 +462,21 @@
}
],
"source": [
"from mindnlp.core import value_and_grad\n",
"def forward_fn(**batch):\n",
" outputs = model(**batch)\n",
" loss = outputs.loss\n",
" return loss\n",
"\n",
"grad_fn = mindspore.value_and_grad(forward_fn, None, tuple(param for param in model.parameters() if param.requires_grad))\n",
"\n",
"def train_step(**batch):\n",
" loss, grads = grad_fn(**batch)\n",
" optimizer.step(grads)\n",
" return loss\n",
"grad_fn = value_and_grad(forward_fn, tuple(param for param in model.parameters() if param.requires_grad))\n",
"\n",
"for epoch in range(num_epochs):\n",
" model.set_train()\n",
" train_total_size = train_dataset.get_dataset_size()\n",
" for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):\n",
" loss = train_step(**batch)\n",
" optimizer.zero_grad()\n",
" loss = grad_fn(**batch)\n",
" optimizer.step()\n",
" lr_scheduler.step()\n",
"\n",
" model.set_train(False)\n",
Expand Down
Loading
Loading