Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into undecorate-typecheck
Browse files Browse the repository at this point in the history
  • Loading branch information
borisfom committed Jun 26, 2024
2 parents 21a5882 + 3371ad5 commit 7a8209a
Show file tree
Hide file tree
Showing 64 changed files with 3,821 additions and 336 deletions.
50 changes: 50 additions & 0 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4185,6 +4185,55 @@ jobs:
AFTER_SCRIPT: |
rm -f examples/asr/evaluation_transcripts.json
L2_Stable_Diffusion_Training:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: |
rm -rf examples/multimodal/text_to_image/sd_train_results
python examples/multimodal/text_to_image/stable_diffusion/sd_train.py \
trainer.devices=1 \
trainer.max_steps=3 \
+trainer.val_check_interval=10 \
trainer.limit_val_batches=2 \
trainer.gradient_clip_val=0 \
exp_manager.exp_dir=examples/multimodal/text_to_image/sd_train_results \
exp_manager.create_checkpoint_callback=False \
exp_manager.resume_if_exists=False \
model.resume_from_checkpoint=null \
model.precision=16 \
model.micro_batch_size=1 \
model.global_batch_size=1 \
model.first_stage_key=moments \
model.cond_stage_key=encoded \
+model.load_vae=False \
+model.load_unet=False \
+model.load_encoder=False \
model.parameterization=v \
model.load_only_unet=False \
model.text_embedding_dropout_rate=0.0 \
model.inductor=True \
model.inductor_cudagraphs=False \
model.capture_cudagraph_iters=15 \
+model.unet_config.num_head_channels=64 \
+model.unet_config.use_linear_in_transformer=True \
model.unet_config.context_dim=1024 \
model.unet_config.use_flash_attention=null \
model.unet_config.resblock_gn_groups=16 \
model.unet_config.unet_precision=fp16 \
+model.unet_config.timesteps=1000 \
model.optim.name=megatron_fused_adam \
+model.optim.capturable=True \
+model.optim.master_weights=True \
model.optim.weight_decay=0.01 \
model.first_stage_config.from_pretrained=null \
model.data.num_workers=16 \
model.data.synthetic_data=True
AFTER_SCRIPT: |
rm -rf examples/multimodal/text_to_image/sd_train_results
Nemo_CICD_Test:
needs:
#- OPTIONAL_L0_Unit_Tests_GPU
Expand Down Expand Up @@ -4279,6 +4328,7 @@ jobs:
- L2_TTS_Fast_dev_runs_1_Mixer-TTS
- L2_TTS_Fast_dev_runs_1_Hifigan
- Speech_Checkpoints_tests
- L2_Stable_Diffusion_Training
if: always()
runs-on: ubuntu-latest
steps:
Expand Down
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ WORKDIR /workspace/
# We leave it here in case we need to work off of a specific commit in main
RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \
cd Megatron-LM && \
git checkout 36e9b6bf3d8034b10c9bbd9fc357c2df2bd1515c && \
git cherry-pick -n e69187bc3679ea5841030a165d587bb48b56ee77 && \
git checkout 02871b4df8c69fac687ab6676c4246e936ce92d0 && \
pip install .

# Performance optimizations for distributed optimizer: https://github.com/NVIDIA/apex/pull/1771
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ WORKDIR /workspace
# Install NeMo requirements
ARG TE_TAG=bfe21c3d68b0a9951e5716fb520045db53419c5e
ARG MODELOPT_VERSION=0.11.0
ARG MCORE_TAG=c90aa1671fc0b97f80fa6c3bb892ce6f8e88e7c9
ARG MCORE_TAG=02871b4df8c69fac687ab6676c4246e936ce92d0
ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
RUN \
--mount=type=bind,source=requirements,target=requirements \
Expand Down
16 changes: 15 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@ Latest News

<details open>
<summary><b>Large Language Models and Multimodal</b></summary>
<details>
<summary>
<a href="https://huggingface.co/models?sort=trending&search=nvidia%2Fnemotron-4-340B">
NVIDIA releases 340B base, instruct, and reward models pretrained on a total of 9T tokens.
</a> (2024-06-18)
</summary>
See documentation and tutorials for SFT, PEFT, and PTQ with
<a href="https://docs.nvidia.com/nemo-framework/user-guide/latest/llms/nemotron/index.html">
Nemotron 340B
</a>
in the NeMo Framework User Guide.
<br><br>
</details>

<details>
<summary>
<a href="https://developer.nvidia.com/blog/nvidia-sets-new-generative-ai-performance-and-scale-records-in-mlperf-training-v4-0/">
Expand Down Expand Up @@ -417,7 +431,7 @@ The most recent working versions of these dependencies are here:
export apex_commit=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
export te_commit=bfe21c3d68b0a9951e5716fb520045db53419c5e
export mcore_commit=fbb375d4b5e88ce52f5f7125053068caff47f93f
export mcore_commit=02871b4df8c69fac687ab6676c4246e936ce92d0
export nv_pytorch_tag=24.02-py3
When using a released version of NeMo, please refer to the `Software Component Versions <https://docs.nvidia.com/nemo-framework/user-guide/latest/softwarecomponentversions.html>`_ for the correct versions.
Expand Down
2 changes: 1 addition & 1 deletion docs/source/nlp/quantization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ The TensorRT-LLM engine can be conveniently built and run using ``TensorRTLLM``

.. code-block:: python
from nemo.export import TensorRTLLM
from nemo.export.tensorrt_llm import TensorRTLLM
trt_llm_exporter = TensorRTLLM(model_dir="/path/to/trt_llm_engine_folder")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ model:

# LLM configs
# use GPTModel from megatron.core
mcore_gpt: False
mcore_gpt: True

# model architecture
encoder_seq_length: 4096
Expand Down Expand Up @@ -149,7 +149,7 @@ model:
bias_activation_fusion: False
megatron_legacy: False

transformer_engine: False
transformer_engine: True
fp8: False # enables fp8 in TransformerLayer forward
fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3
fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID
Expand Down
12 changes: 9 additions & 3 deletions examples/nlp/language_modeling/megatron_gpt_continue_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,11 @@ def load_from_checkpoint_dir(cls, cfg, trainer, modify_confg_fn):
gpt_cfg = modify_confg_fn(hparams_file.cfg, cfg, add_cfg_to_tree=True)
with tempfile.NamedTemporaryFile(suffix='.yaml') as f:
OmegaConf.save(config=gpt_cfg, f=f.name)
model = cls.load_from_checkpoint(checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name,)
model = cls.load_from_checkpoint(
checkpoint_path=checkpoint_path,
trainer=trainer,
hparams_file=f.name,
)
return model


Expand All @@ -141,11 +145,12 @@ def main(cfg) -> None:
gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
find_unused_parameters=False,
)
precision = cfg.trainer.precision
if cfg.trainer.precision in [16, '16', 'bf16', '16-mixed', 'bf16-mixed']:
scaler = None
if cfg.trainer.precision in [16, '16', '16-mixed']:
scaler = GradScaler(
init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
init_scale=cfg.model.get('native_amp_init_scale', 2**32),
growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
hysteresis=cfg.model.get('hysteresis', 2),
)
Expand All @@ -156,7 +161,7 @@ def main(cfg) -> None:
plugins.append(MegatronHalfPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
else:
plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))

cfg.trainer.precision = None
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

Expand All @@ -165,6 +170,7 @@ def main(cfg) -> None:
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)
cfg.trainer.precision = precision

exp_manager(trainer, cfg.exp_manager)

Expand Down
38 changes: 38 additions & 0 deletions nemo/collections/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,30 @@
SquadDataModule,
)
from nemo.collections.llm.gpt.model import (
CodeGemmaConfig2B,
CodeGemmaConfig7B,
CodeLlamaConfig7B,
CodeLlamaConfig13B,
CodeLlamaConfig34B,
CodeLlamaConfig70B,
GemmaConfig,
GemmaConfig2B,
GemmaConfig7B,
GemmaModel,
GPTConfig,
GPTModel,
Llama2Config7B,
Llama2Config13B,
Llama2Config70B,
Llama3Config8B,
Llama3Config70B,
LlamaConfig,
LlamaModel,
MaskedTokenLossReduction,
Mistral7BConfig,
Mistral7BModel,
MixtralConfig,
MixtralModel,
gpt_data_step,
gpt_forward_step,
)
Expand All @@ -31,6 +50,25 @@
"MaskedTokenLossReduction",
"Mistral7BConfig",
"Mistral7BModel",
"MixtralConfig",
"MixtralModel",
"LlamaConfig",
"Llama2Config7B",
"Llama2Config13B",
"Llama2Config70B",
"Llama3Config8B",
"Llama3Config70B",
"CodeLlamaConfig7B",
"CodeLlamaConfig13B",
"CodeLlamaConfig34B",
"CodeLlamaConfig70B",
"LlamaModel",
"GemmaConfig",
"GemmaConfig2B",
"GemmaConfig7B",
"CodeGemmaConfig2B",
"CodeGemmaConfig7B",
"GemmaModel",
"PreTrainingDataModule",
"FineTuningDataModule",
"SquadDataModule",
Expand Down
23 changes: 7 additions & 16 deletions nemo/collections/llm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def train(
trainer: Trainer,
log: Annotated[Optional[NeMoLogger], Config[NeMoLogger]] = None,
resume: Annotated[Optional[AutoResume], Config[AutoResume]] = None,
opt: Optional[OptimizerModule] = None,
optim: Optional[OptimizerModule] = None,
tokenizer: Optional[str] = None,
# TODO: Fix export export: Optional[str] = None,
) -> Path:
Expand All @@ -28,7 +28,7 @@ def train(
trainer (Trainer): The trainer instance configured with a MegatronStrategy.
log (NeMoLogger): A nemologger instance.
resume (Optional[Union[AutoResume, Resume]]): Resume training from a checkpoint.
opt (Optional[OptimizerModule]): The optimizer module to be used. If not provided, the default optimizer
optim (Optional[OptimizerModule]): The optimizer module to be used. If not provided, the default optimizer
from the model will be used.
tokenizer (Optional[str]): Tokenizer setting to be applied. Can be 'data' or 'model'.
export (Optional[str]): Filename to save the exported checkpoint after training.
Expand All @@ -49,27 +49,18 @@ def train(
>>> train(model, data, trainer, tokenizer='data', source='path/to/ckpt.ckpt', export='final.ckpt')
PosixPath('/path/to/log_dir')
"""
if not isinstance(trainer.strategy, MegatronStrategy):
raise ValueError("Only MegatronStrategy is supported")

_log = log or NeMoLogger()

if tokenizer: # TODO: Improve this
_use_tokenizer(model, data, tokenizer)

app_state = _log.setup(
trainer,
resume_if_exists=getattr(resume, "resume_if_exists", False),
task_config=getattr(train, "__io__", None),
)
if resume is not None:
resume.setup(model, trainer)
if opt:
opt.connect(model)

trainer.fit(model, data)

if hasattr(train, "__io__"):
_save_config_img(app_state.exp_dir, train.__io__)
if optim:
optim.connect(model)
if tokenizer: # TODO: Improve this
_use_tokenizer(model, data, tokenizer)

trainer.fit(model, data)

Expand Down
11 changes: 11 additions & 0 deletions nemo/collections/llm/fn/activation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import torch


@torch.jit.script
def gelu_impl(x):
"""OpenAI's gelu implementation."""
return 0.5 * x * (1.0 + torch.tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x)))


def openai_gelu(x):
return gelu_impl(x)
3 changes: 2 additions & 1 deletion nemo/collections/llm/gpt/data/pre_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytorch_lightning as pl
from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
from torch.utils import data
from torch.utils.data import DataLoader

from nemo.lightning.pytorch.plugins import MegatronDataSampler
Expand Down Expand Up @@ -121,7 +122,7 @@ def _create_dataloader(self, dataset, **kwargs) -> DataLoader:
num_workers=self.num_workers,
pin_memory=self.pin_memory,
persistent_workers=self.persistent_workers,
collate_fn=dataset.collate_fn,
collate_fn=getattr(dataset, 'collate_fn', data.dataloader.default_collate),
**kwargs,
)

Expand Down
41 changes: 41 additions & 0 deletions nemo/collections/llm/gpt/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,54 @@
gpt_data_step,
gpt_forward_step,
)
from nemo.collections.llm.gpt.model.gemma import (
CodeGemmaConfig2B,
CodeGemmaConfig7B,
GemmaConfig,
GemmaConfig2B,
GemmaConfig7B,
GemmaModel,
)
from nemo.collections.llm.gpt.model.llama import (
CodeLlamaConfig7B,
CodeLlamaConfig13B,
CodeLlamaConfig34B,
CodeLlamaConfig70B,
Llama2Config7B,
Llama2Config13B,
Llama2Config70B,
Llama3Config8B,
Llama3Config70B,
LlamaConfig,
LlamaModel,
)
from nemo.collections.llm.gpt.model.mistral_7b import Mistral7BConfig, Mistral7BModel
from nemo.collections.llm.gpt.model.mixtral import MixtralConfig, MixtralModel

__all__ = [
"GPTConfig",
"GPTModel",
"Mistral7BConfig",
"Mistral7BModel",
"MixtralConfig",
"MixtralModel",
"LlamaConfig",
"Llama2Config7B",
"Llama2Config13B",
"Llama2Config70B",
"Llama3Config8B",
"Llama3Config70B",
"CodeLlamaConfig7B",
"CodeLlamaConfig13B",
"CodeLlamaConfig34B",
"CodeLlamaConfig70B",
"GemmaConfig",
"GemmaConfig2B",
"GemmaConfig7B",
"CodeGemmaConfig2B",
"CodeGemmaConfig7B",
"GemmaModel",
"LlamaModel",
"MaskedTokenLossReduction",
"gpt_data_step",
"gpt_forward_step",
Expand Down
Loading

0 comments on commit 7a8209a

Please sign in to comment.