Skip to content

Commit

Permalink
peft eval directly from ckpt (#6785)
Browse files Browse the repository at this point in the history
* update to load from ckpt

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* load ckpt peft model

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update style

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
arendu and pre-commit-ci[bot] authored Jun 1, 2023
1 parent 8672af6 commit 89cbf1d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ model:
peft:
peft_scheme: "adapter" # can be either adapter,ia3, or ptuning
restore_from_path: null
restore_from_ckpt_name: null
restore_from_hparams_path: null

# Used for adapter peft training
adapter_tuning:
Expand Down
32 changes: 26 additions & 6 deletions examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,17 @@ def main(cfg) -> None:

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer)
if cfg.model.peft.restore_from_path:
peft_model_cfg = MegatronGPTPEFTModel.restore_from(
restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True,
)
if cfg.model.peft.restore_from_path.endswith(".nemo"):
peft_model_cfg = MegatronGPTPEFTModel.restore_from(
restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True,
)
elif cfg.model.peft.restore_from_hparams_path: # not a .nemo model we expect a hparams.yaml file
peft_model_cfg = OmegaConf.to_container(OmegaConf.load(cfg.model.peft.restore_from_hparams_path).cfg)
peft_model_cfg = OmegaConf.create(peft_model_cfg)
# extract dict inside cfg key and convert it to DictConfig
# this allows interpolation to work the same way as config from the .restore_from method
else:
raise RuntimeError("This script requires a .nemo peft model or path to hparams.yaml (and a ckpt path).")
else:
peft_model_cfg = MegatronGPTSFTModel.restore_from(
restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True,
Expand All @@ -127,9 +135,21 @@ def main(cfg) -> None:
cfg.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate

if cfg.model.peft.restore_from_path:
save_restore_connector = PEFTSaveRestoreConnector(
peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None,
)
if cfg.model.peft.restore_from_path.endswith(".nemo"):
save_restore_connector = PEFTSaveRestoreConnector(
peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None,
)
else:
# attempting to load a ckpt peft model.
if cfg.model.peft.restore_from_ckpt_name:
ckpt_name = cfg.model.peft.restore_from_ckpt_name
else:
ckpt_name = "model_weights.ckpt"
save_restore_connector = PEFTSaveRestoreConnector(
peft_model_nemo_path=None,
peft_model_ckpt_path=cfg.model.peft.restore_from_path,
peft_model_ckpt_name=ckpt_name,
)
else:
save_restore_connector = NLPSaveRestoreConnector()

Expand Down
12 changes: 9 additions & 3 deletions nemo/collections/nlp/parts/nlp_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,14 +404,20 @@ class PEFTSaveRestoreConnector(NLPSaveRestoreConnector):
Args:
peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params)
peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFt model. This is required when no .nemo is available (yet) such as during resumed training.
peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFT model. This is required when no .nemo is available (yet) such as during resumed training.
peft_model_ckpt_name: The filename of the ckpt file inside the peft_model_ckpt_path folder
If both are provided the peft_model_ckpt_path takes precedence.
If neither are provided, PEFT params are initialized at random (not loaded from any external source).
"""

def __init__(self, peft_model_nemo_path: Optional[str] = None, peft_model_ckpt_path: Optional[str] = None) -> None:
def __init__(
self,
peft_model_nemo_path: Optional[str] = None,
peft_model_ckpt_path: Optional[str] = None,
peft_model_ckpt_name: Optional[str] = "model_weights.ckpt",
) -> None:
super().__init__()
self.peft_model_ckpt_name = "model_weights.ckpt"
self.peft_model_ckpt_name = peft_model_ckpt_name
if peft_model_ckpt_path:
# First we will try to load a adapter ckpt path
# this is given priority over loading from nemo path to make resumption of training possible
Expand Down

0 comments on commit 89cbf1d

Please sign in to comment.